From b2d400ccf17b6e04012e3c667baa94626e199728 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:29:25 +1000 Subject: [PATCH 01/30] Mute org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT testRankDocsRetrieverWithCollapse #112254 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 7feefa1255f4..ec2a846f71c4 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -172,6 +172,9 @@ tests: - class: org.elasticsearch.blobcache.shared.SharedBlobCacheServiceTests method: testGetMultiThreaded issue: https://github.com/elastic/elasticsearch/issues/112314 +- class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT + method: testRankDocsRetrieverWithCollapse + issue: https://github.com/elastic/elasticsearch/issues/112254 # Examples: # From 633f5f9fe37618e1a998e397cdb006db4af55610 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:41:59 +1000 Subject: [PATCH 02/30] Mute org.elasticsearch.search.ccs.CCSUsageTelemetryIT org.elasticsearch.search.ccs.CCSUsageTelemetryIT #112324 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index ec2a846f71c4..71a347920178 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -175,6 +175,8 @@ tests: - class: org.elasticsearch.search.retriever.RankDocRetrieverBuilderIT method: testRankDocsRetrieverWithCollapse issue: https://github.com/elastic/elasticsearch/issues/112254 +- class: org.elasticsearch.search.ccs.CCSUsageTelemetryIT + issue: https://github.com/elastic/elasticsearch/issues/112324 # Examples: # From 9344f173d32231f1c47e5ef994bffa27b61da876 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 05:54:13 +0100 Subject: [PATCH 03/30] Add general read/write optional support (#112276) Today `StreamOutput#writeOptionalWriteable` allows to write a possibly-null value that implements `Writeable` and therefore carries its own serialization, but sometimes we want to write an optional value and provide a custom `Writer` too. This commit adds `StreamOutput#writeOptional` and a corresponding `StreamInput#readOptional` to support this. --- .../action/bulk/BulkItemRequest.java | 10 +++--- .../action/bulk/BulkItemResponse.java | 32 +++++++------------ .../action/bulk/BulkShardRequest.java | 9 +----- .../action/bulk/BulkShardResponse.java | 2 +- .../common/io/stream/StreamInput.java | 15 +++++++++ .../common/io/stream/StreamOutput.java | 21 ++++++++++++ .../bucket/range/InternalBinaryRange.java | 14 +++----- .../common/io/stream/AbstractStreamTests.java | 11 +++++++ .../core/rollup/job/RollupJobStatus.java | 9 +++--- .../actions/execute/ExecuteWatchRequest.java | 18 +++-------- 10 files changed, 77 insertions(+), 64 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java index 425461d1f4ba..7c1304f92eef 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemRequest.java @@ -101,11 +101,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(primaryResponse); } - public void writeThin(StreamOutput out) throws IOException { - out.writeVInt(id); - DocWriteRequest.writeDocumentRequestThin(out, request); - out.writeOptionalWriteable(primaryResponse == null ? null : primaryResponse::writeThin); - } + public static final Writer THIN_WRITER = (out, item) -> { + out.writeVInt(item.id); + DocWriteRequest.writeDocumentRequestThin(out, item.request); + out.writeOptional(BulkItemResponse.THIN_WRITER, item.primaryResponse); + }; @Override public long ramBytesUsed() { diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java index 151e8795d0f8..d3e550eaf05b 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkItemResponse.java @@ -264,7 +264,7 @@ public String toString() { id = in.readVInt(); opType = OpType.fromId(in.readByte()); response = readResponse(shardId, in); - failure = in.readBoolean() ? new Failure(in) : null; + failure = in.readOptionalWriteable(Failure::new); assertConsistent(); } @@ -272,7 +272,7 @@ public String toString() { id = in.readVInt(); opType = OpType.fromId(in.readByte()); response = readResponse(in); - failure = in.readBoolean() ? new Failure(in) : null; + failure = in.readOptionalWriteable(Failure::new); assertConsistent(); } @@ -384,31 +384,21 @@ public void writeTo(StreamOutput out) throws IOException { writeResponseType(out); response.writeTo(out); } - if (failure == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - failure.writeTo(out); - } + out.writeOptionalWriteable(failure); } - public void writeThin(StreamOutput out) throws IOException { - out.writeVInt(id); - out.writeByte(opType.getId()); + public static final Writer THIN_WRITER = (out, item) -> { + out.writeVInt(item.id); + out.writeByte(item.opType.getId()); - if (response == null) { + if (item.response == null) { out.writeByte((byte) 2); } else { - writeResponseType(out); - response.writeThin(out); + item.writeResponseType(out); + item.response.writeThin(out); } - if (failure == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - failure.writeTo(out); - } - } + out.writeOptionalWriteable(item.failure); + }; private void writeResponseType(StreamOutput out) throws IOException { if (response instanceof SimulateIndexResponse) { diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java index 0d2942e68838..f7860c47d8b7 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardRequest.java @@ -130,14 +130,7 @@ public void writeTo(StreamOutput out) throws IOException { throw new IllegalStateException("Inference metadata should have been consumed before writing to the stream"); } super.writeTo(out); - out.writeArray((o, item) -> { - if (item != null) { - o.writeBoolean(true); - item.writeThin(o); - } else { - o.writeBoolean(false); - } - }, items); + out.writeArray((o, item) -> o.writeOptional(BulkItemRequest.THIN_WRITER, item), items); if (out.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_VALIDATES_MAPPINGS)) { out.writeBoolean(isSimulated); } diff --git a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java index 3eeb96546c9b..eb1bb0468c9b 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/BulkShardResponse.java @@ -56,6 +56,6 @@ public void setForcedRefresh(boolean forcedRefresh) { public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); shardId.writeTo(out); - out.writeArray((o, item) -> item.writeThin(o), responses); + out.writeArray(BulkItemResponse.THIN_WRITER, responses); } } diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index ec0edb2d07e5..497028ef37c6 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -1095,8 +1095,23 @@ public T[] readOptionalArray(Writeable.Reader reader, IntFunction ar return readBoolean() ? readArray(reader, arraySupplier) : null; } + /** + * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}. + * + * @see StreamOutput#writeOptionalWriteable + */ + // just an alias for readOptional() since we don't actually care whether T extends Writeable @Nullable public T readOptionalWriteable(Writeable.Reader reader) throws IOException { + return readOptional(reader); + } + + /** + * Reads a possibly-null value using the given {@link org.elasticsearch.common.io.stream.Writeable.Reader}. + * + * @see StreamOutput#writeOptional + */ + public T readOptional(Writeable.Reader reader) throws IOException { if (readBoolean()) { T t = reader.read(this); if (t == null) { diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index c65ae2e3463d..5780885473b0 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -1015,6 +1015,12 @@ public void writeOptionalArray(@Nullable T[] array) throws writeOptionalArray(StreamOutput::writeWriteable, array); } + /** + * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not + * {@code null}. + * + * @see StreamInput#readOptionalWriteable + */ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOException { if (writeable != null) { writeBoolean(true); @@ -1024,6 +1030,21 @@ public void writeOptionalWriteable(@Nullable Writeable writeable) throws IOExcep } } + /** + * Writes a boolean value indicating whether the given object is {@code null}, followed by the object's serialization if it is not + * {@code null}. + * + * @see StreamInput#readOptional + */ + public void writeOptional(Writer writer, @Nullable T maybeItem) throws IOException { + if (maybeItem != null) { + writeBoolean(true); + writer.write(this, maybeItem); + } else { + writeBoolean(false); + } + } + /** * This method allow to use a method reference when writing collection elements such as * {@code out.writeMap(map, StreamOutput::writeString, StreamOutput::writeWriteable)} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java index 2b5bcd9931f6..528c37de7a4a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java @@ -72,8 +72,8 @@ private static Bucket createFromStream(StreamInput in, DocValueFormat format, bo String key = in.getTransportVersion().equals(TransportVersions.V_8_0_0) ? in.readString() : in.getTransportVersion().onOrAfter(TransportVersions.V_7_17_1) ? in.readOptionalString() : in.readString(); - BytesRef from = in.readBoolean() ? in.readBytesRef() : null; - BytesRef to = in.readBoolean() ? in.readBytesRef() : null; + BytesRef from = in.readOptional(StreamInput::readBytesRef); + BytesRef to = in.readOptional(StreamInput::readBytesRef); long docCount = in.readLong(); InternalAggregations aggregations = InternalAggregations.readFrom(in); @@ -89,14 +89,8 @@ public void writeTo(StreamOutput out) throws IOException { } else { out.writeString(key == null ? generateKey(from, to, format) : key); } - out.writeBoolean(from != null); - if (from != null) { - out.writeBytesRef(from); - } - out.writeBoolean(to != null); - if (to != null) { - out.writeBytesRef(to); - } + out.writeOptional(StreamOutput::writeBytesRef, from); + out.writeOptional(StreamOutput::writeBytesRef, to); out.writeLong(docCount); aggregations.writeTo(out); } diff --git a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java index b1104a72400e..ae686afcbb29 100644 --- a/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java +++ b/server/src/test/java/org/elasticsearch/common/io/stream/AbstractStreamTests.java @@ -761,6 +761,17 @@ public void checkZonedDateTimeSerialization(TransportVersion tv) throws IOExcept } } + public void testOptional() throws IOException { + try (var output = new BytesStreamOutput()) { + output.writeOptional(StreamOutput::writeString, "not-null"); + output.writeOptional(StreamOutput::writeString, null); + + final var input = getStreamInput(output.bytes()); + assertEquals("not-null", input.readOptional(StreamInput::readString)); + assertNull(input.readOptional(StreamInput::readString)); + } + } + private void assertSerialization( CheckedConsumer outputAssertions, CheckedConsumer inputAssertions, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java index 1ba625a507a4..f7ad1f65628b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/rollup/job/RollupJobStatus.java @@ -74,7 +74,7 @@ public RollupJobStatus(IndexerState state, @Nullable Map positio public RollupJobStatus(StreamInput in) throws IOException { state = IndexerState.fromStream(in); - currentPosition = in.readBoolean() ? new TreeMap<>(in.readGenericMap()) : null; + currentPosition = in.readOptional(CURRENT_POSITION_READER); if (in.getTransportVersion().before(TransportVersions.V_8_0_0)) { // 7.x nodes serialize `upgradedDocumentID` flag. We don't need it anymore, but // we need to pull it off the stream @@ -83,6 +83,8 @@ public RollupJobStatus(StreamInput in) throws IOException { } } + private static final Reader> CURRENT_POSITION_READER = in -> new TreeMap<>(in.readGenericMap()); + public IndexerState getIndexerState() { return state; } @@ -118,10 +120,7 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { state.writeTo(out); - out.writeBoolean(currentPosition != null); - if (currentPosition != null) { - out.writeGenericMap(currentPosition); - } + out.writeOptional(StreamOutput::writeGenericMap, currentPosition); if (out.getTransportVersion().before(TransportVersions.V_8_0_0)) { // 7.x nodes expect a boolean `upgradedDocumentID` flag. We don't have it anymore, // but we need to tell them we are upgraded in case there is a mixed cluster diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java index 681b004dd1d2..2f2617f956ed 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/transport/actions/execute/ExecuteWatchRequest.java @@ -59,12 +59,8 @@ public ExecuteWatchRequest(StreamInput in) throws IOException { id = in.readOptionalString(); ignoreCondition = in.readBoolean(); recordExecution = in.readBoolean(); - if (in.readBoolean()) { - alternativeInput = in.readGenericMap(); - } - if (in.readBoolean()) { - triggerData = in.readGenericMap(); - } + alternativeInput = in.readOptional(StreamInput::readGenericMap); + triggerData = in.readOptional(StreamInput::readGenericMap); long actionModesCount = in.readLong(); actionModes = new HashMap<>(); for (int i = 0; i < actionModesCount; i++) { @@ -83,14 +79,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(id); out.writeBoolean(ignoreCondition); out.writeBoolean(recordExecution); - out.writeBoolean(alternativeInput != null); - if (alternativeInput != null) { - out.writeGenericMap(alternativeInput); - } - out.writeBoolean(triggerData != null); - if (triggerData != null) { - out.writeGenericMap(triggerData); - } + out.writeOptional(StreamOutput::writeGenericMap, alternativeInput); + out.writeOptional(StreamOutput::writeGenericMap, triggerData); out.writeLong(actionModes.size()); for (Map.Entry entry : actionModes.entrySet()) { out.writeString(entry.getKey()); From 59a42ed41b72ea92e62e4522b83d9f9f48955203 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 06:03:13 +0100 Subject: [PATCH 04/30] Include network disconnect info in troubleshooting docs (#112323) A misplaced `//end::` tag meant that the docs added in #112271 are only included in the page on fault detection and not the equivalent troubleshooting docs. This commit fixes the problem. --- docs/reference/modules/discovery/fault-detection.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index 89c8a78eccbc..d12985b70597 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -300,7 +300,6 @@ To reconstruct the output, base64-decode the data and decompress it using ---- cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ---- -//end::troubleshooting[] [discrete] ===== Diagnosing other network disconnections @@ -345,3 +344,4 @@ packet capture simultaneously from the nodes at both ends of an unstable connection and analyse it alongside the {es} logs from those nodes to determine if traffic between the nodes is being disrupted by another device on the network. +//end::troubleshooting[] From aa67bdb5ca8abebcee8a50ebb58e6160d134230c Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Thu, 29 Aug 2024 09:53:09 +0300 Subject: [PATCH 05/30] ES|QL: EsqlAsyncSecurityIT workaround for lazy .async-search indexing (#112287) --- muted-tests.yml | 3 -- .../xpack/esql/EsqlAsyncSecurityIT.java | 30 ++++++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 71a347920178..508403ee6238 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -137,9 +137,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAfterMissingIndex issue: https://github.com/elastic/elasticsearch/issues/112088 -- class: org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT - method: testLimitedPrivilege - issue: https://github.com/elastic/elasticsearch/issues/112110 - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: test {stats.ByTwoCalculatedSecondOverwrites SYNC} issue: https://github.com/elastic/elasticsearch/issues/112117 diff --git a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java index 0806e4118639..f2633dfffb0f 100644 --- a/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java +++ b/x-pack/plugin/esql/qa/security/src/javaRestTest/java/org/elasticsearch/xpack/esql/EsqlAsyncSecurityIT.java @@ -67,7 +67,7 @@ public void testUnauthorizedIndices() throws IOException { var getResponse = runAsyncGet("user1", id); // sanity assertOK(getResponse); ResponseException error; - error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id)); + error = expectThrows(ResponseException.class, () -> runAsyncGet("user2", id, true)); // resource not found exception if the authenticated user is not the creator of the original task assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404)); @@ -85,7 +85,7 @@ public void testUnauthorizedIndices() throws IOException { var getResponse = runAsyncGet("user2", id); // sanity assertOK(getResponse); ResponseException error; - error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id)); + error = expectThrows(ResponseException.class, () -> runAsyncGet("user1", id, true)); assertThat(error.getResponse().getStatusLine().getStatusCode(), equalTo(404)); error = expectThrows(ResponseException.class, () -> runAsyncDelete("user1", id)); @@ -117,6 +117,10 @@ private Response runAsync(String user, String command) throws IOException { } private Response runAsyncGet(String user, String id) throws IOException { + return runAsyncGet(user, id, false); + } + + private Response runAsyncGet(String user, String id, boolean isAsyncIdNotFound_Expected) throws IOException { int tries = 0; while (tries < 10) { // Sometimes we get 404s fetching the task status. @@ -129,22 +133,32 @@ private Response runAsyncGet(String user, String id) throws IOException { logResponse(response); return response; } catch (ResponseException e) { - if (e.getResponse().getStatusLine().getStatusCode() == 404 - && EntityUtils.toString(e.getResponse().getEntity()).contains("no such index [.async-search]")) { - /* - * Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search - * index may not exist when we try the fetch, but it should exist on next attempt. - */ + var statusCode = e.getResponse().getStatusLine().getStatusCode(); + var message = EntityUtils.toString(e.getResponse().getEntity()); + + if (statusCode == 404 && message.contains("no such index [.async-search]")) { + // Work around https://github.com/elastic/elasticsearch/issues/110304 - the .async-search + // index may not exist when we try the fetch, but it should exist on next attempt. logger.warn("async-search index does not exist", e); try { Thread.sleep(1000); } catch (InterruptedException ex) { throw new RuntimeException(ex); } + } else if (statusCode == 404 && false == isAsyncIdNotFound_Expected && message.contains("resource_not_found_exception")) { + // Work around for https://github.com/elastic/elasticsearch/issues/112110 + // The async id is not indexed quickly enough in .async-search index for us to retrieve it. + logger.warn("async id not found", e); + try { + Thread.sleep(500); + } catch (InterruptedException ex) { + throw new RuntimeException(ex); + } } else { throw e; } tries++; + logger.warn("retry [" + tries + "] for GET /_query/async/" + id); } } throw new IllegalStateException("couldn't find task status"); From b9dea69b5ca5b34600d1fc51badc3a9b163107b2 Mon Sep 17 00:00:00 2001 From: weizijun Date: Thu, 29 Aug 2024 15:17:27 +0800 Subject: [PATCH 06/30] [Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API (#112273) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- docs/changelog/112273.yaml | 5 + .../inference/inference-apis.asciidoc | 1 + .../inference/put-inference.asciidoc | 1 + .../service-alibabacloud-ai-search.asciidoc | 184 ++++++++++++++++++ .../semantic-search-inference.asciidoc | 1 + .../infer-api-ingest-pipeline-widget.asciidoc | 17 ++ .../infer-api-ingest-pipeline.asciidoc | 26 +++ .../infer-api-mapping-widget.asciidoc | 17 ++ .../inference-api/infer-api-mapping.asciidoc | 32 +++ .../infer-api-reindex-widget.asciidoc | 17 ++ .../inference-api/infer-api-reindex.asciidoc | 23 +++ .../infer-api-requirements-widget.asciidoc | 17 ++ .../infer-api-requirements.asciidoc | 6 + .../infer-api-search-widget.asciidoc | 17 ++ .../inference-api/infer-api-search.asciidoc | 65 +++++++ .../infer-api-task-widget.asciidoc | 17 ++ .../inference-api/infer-api-task.asciidoc | 29 +++ 17 files changed, 475 insertions(+) create mode 100644 docs/changelog/112273.yaml create mode 100644 docs/reference/inference/service-alibabacloud-ai-search.asciidoc diff --git a/docs/changelog/112273.yaml b/docs/changelog/112273.yaml new file mode 100644 index 000000000000..3182a1884a14 --- /dev/null +++ b/docs/changelog/112273.yaml @@ -0,0 +1,5 @@ +pr: 111181 +summary: "[Inference API] Add Docs for AlibabaCloud AI Search Support for the Inference API" +area: Machine Learning +type: enhancement +issues: [ ] diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 33db148755d8..8fdf8aecc2ae 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -39,6 +39,7 @@ include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] include::put-inference.asciidoc[] +include::service-alibabacloud-ai-search.asciidoc[] include::service-amazon-bedrock.asciidoc[] include::service-anthropic.asciidoc[] include::service-azure-ai-studio.asciidoc[] diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc index 57485e0720cc..ba26a563541f 100644 --- a/docs/reference/inference/put-inference.asciidoc +++ b/docs/reference/inference/put-inference.asciidoc @@ -39,6 +39,7 @@ The create {infer} API enables you to create an {infer} endpoint and configure a The following services are available through the {infer} API, click the links to review the configuration details of the services: +* <> * <> * <> * <> diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc new file mode 100644 index 000000000000..df5220573d9e --- /dev/null +++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc @@ -0,0 +1,184 @@ +[[infer-service-alibabacloud-ai-search]] +=== AlibabaCloud AI Search {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `alibabacloud-ai-search` service. + +[discrete] +[[infer-service-alibabacloud-ai-search-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-alibabacloud-ai-search-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `text_embedding`, +* `sparse_embedding`. +* `rerank`. +-- + +[discrete] +[[infer-service-alibabacloud-ai-search-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) The type of service supported for the specified task type. +In this case, +`alibabacloud-ai-search`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `alibabacloud-ai-search` service. +-- + +`api_key`::: +(Required, string) +A valid API key for the AlibabaCloud AI Search API. + +`service_id`::: +(Required, string) +The name of the model service to use for the {infer} task. ++ +-- +Available service_ids for the `text_embedding` task: + +* `ops-text-embedding-001` +* `ops-text-embedding-zh-001` +* `ops-text-embedding-en-001` +* `ops-text-embedding-002` + +For the supported `text_embedding` service_ids, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[documentation]. + +Available service_id for the `sparse_embedding` task: + +* `ops-text-sparse-embedding-001` + +For the supported `sparse_embedding` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-sparse-embedding-api-details[documentation]. + +Available service_id for the `rerank` task is: + +* `ops-bge-reranker-larger` + +For the supported `rerank` service_id, refer to the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/ranker-api-details[documentation]. +-- + +`host`::: +(Required, string) +The name of the host address used for the {infer} task. You can find the host address at https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[ the API keys section] of the documentation. + +`workspace`::: +(Required, string) +The name of the workspace used for the {infer} task. + +`rate_limit`::: +(Optional, object) +By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`. +This helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + + +`task_settings`:: +(Optional, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `text_embedding` task type +[%collapsible%closed] +===== +`input_type`::: +(Optional, string) +Specifies the type of input passed to the model. +Valid values are: +* `ingest`: for storing document embeddings in a vector database. +* `search`: for storing embeddings of search queries run against a vector database to find relevant documents. +===== ++ +.`task_settings` for the `sparse_embedding` task type +[%collapsible%closed] +===== +`input_type`::: +(Optional, string) +Specifies the type of input passed to the model. +Valid values are: +* `ingest`: for storing document embeddings in a vector database. +* `search`: for storing embeddings of search queries run against a vector database to find relevant documents. + +`return_token`::: +(Optional, boolean) +If `true`, the token name will be returned in the response. Defaults to `false` which means only the token ID will be returned in the response. +===== + +[discrete] +[[inference-example-alibabacloud-ai-search]] +==== AlibabaCloud AI Search service examples + +The following example shows how to create an {infer} endpoint called `alibabacloud_ai_search_embeddings` to perform a `text_embedding` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/alibabacloud_ai_search_embeddings +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-text-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The following example shows how to create an {infer} endpoint called +`alibabacloud_ai_search_sparse` to perform a `sparse_embedding` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-text-sparse-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The next example shows how to create an {infer} endpoint called +`alibabacloud_ai_search_rerank` to perform a `rerank` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/rerank/alibabacloud_ai_search_rerank +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", + "service_id": "ops-bge-reranker-larger", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index f74bc65e31bf..719aeb070fc7 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -17,6 +17,7 @@ Azure based examples use models available through https://ai.azure.com/explore/m or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API]. Amazon Bedrock examples use the `amazon.titan-embed-text-v1` model from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[the Amazon Bedrock base models]. +AlibabaCloud AI Search examples use the `ops-text-embedding-zh-001` model from https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[the AlibabaCloud AI Search base models]. Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index 997dbbe8a20e..3a686e27cf58 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-ingest-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc index 6adf3d2ebbf4..6678b60fabc4 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc @@ -216,3 +216,29 @@ PUT _ingest/pipeline/amazon_bedrock_embeddings and the `output_field` that will contain the {infer} results. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/alibabacloud_ai_search_embeddings +{ + "processors": [ + { + "inference": { + "model_id": "alibabacloud_ai_search_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc index 4e3a453a7bbe..66b790bdd57a 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-mapping-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc index abeeb87f03e7..c86538ceb9c8 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc @@ -270,3 +270,35 @@ the {infer} pipeline configuration in the next step. <6> The field type which is text in this example. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +PUT alibabacloud-ai-search-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "dense_vector", <2> + "dims": 1024, <3> + "element_type": "float" + }, + "content": { <4> + "type": "text" <5> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be referenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `dense_vector` field. +<3> The output dimensions of the model. This value may be different depending on the underlying model used. +See the https://help.aliyun.com/zh/open-search/search-platform/developer-reference/text-embedding-api-details[AlibabaCloud AI Search embedding model] documentation. +<4> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<5> The field type which is text in this example. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc index 45cb9fc51b9f..86f52fee2063 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-reindex-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc index d961ec8bd39b..25d4023c650c 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc @@ -200,3 +200,26 @@ number makes the update of the reindexing process quicker which enables you to follow the progress closely and detect errors early. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "alibabacloud-ai-search-embeddings", + "pipeline": "alibabacloud_ai_search_embeddings" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc index c867b39b88e3..fb686a2d8be1 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-requirements-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc index 603cd85a8f93..c9e7ca8b80ba 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc @@ -52,3 +52,9 @@ You can apply for access to Azure OpenAI by completing the form at https://aka.m * A pair of access and secret keys used to access Amazon Bedrock // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] +* An AlibabaCloud Account with https://console.aliyun.com[AlibabaCloud] access +* An API key generated for your account from the https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section] + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc index fa4a11c59a15..996148d80a4b 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-search-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc index f23ed1dfef05..fe1f58b6bd1a 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc @@ -531,3 +531,68 @@ query from the `amazon-bedrock-embeddings` index sorted by their proximity to th // NOTCONSOLE // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +-------------------------------------------------- +GET alibabacloud-ai-search-embeddings/_search +{ + "knn": { + "field": "content_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "alibabacloud_ai_search_embeddings", + "model_text": "Calculate fuel cost" + } + }, + "k": 10, + "num_candidates": 100 + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `alibabacloud-ai-search-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "DDd5OowBHxQKHyc3TDSC", + "_score": 0.83704096, + "_source": { + "id": 862114, + "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes." + } + }, + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "ajd5OowBHxQKHyc3TDSC", + "_score": 0.8345704, + "_source": { + "id": 820622, + "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances." + } + }, + { + "_index": "alibabacloud-ai-search-embeddings", + "_id": "Djd5OowBHxQKHyc3TDSC", + "_score": 0.8327426, + "_source": { + "id": 8202683, + "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::alibabacloud-ai-search[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc index f12be341d866..1dfa6077553f 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc @@ -49,6 +49,12 @@ id="infer-api-task-amazon-bedrock"> Amazon Bedrock +
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc index b186b2c58ccc..2b4aa1a20010 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc @@ -223,3 +223,32 @@ PUT _inference/text_embedding/amazon_bedrock_embeddings <1> <6> The model ID or ARN of the model to use. // end::amazon-bedrock[] + +// tag::alibabacloud-ai-search[] + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/alibabacloud_ai_search_embeddings <1> +{ + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "", <2> + "service_id": "", <3> + "host": "", <4> + "workspace": "" <5> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `text_embedding` in the path and the `inference_id` which is the unique identifier of the {infer} endpoint is `alibabacloud_ai_search_embeddings`. +<2> The API key for accessing the AlibabaCloud AI Search API. You can find your API keys in +your AlibabaCloud account under the +https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key[API keys section]. You need to provide +your API key only once. The <> does not return your API +key. +<3> The AlibabaCloud AI Search embeddings model name, for example `ops-text-embedding-zh-001`. +<4> The name our your AlibabaCloud AI Search host address. +<5> The name our your AlibabaCloud AI Search workspace. + +// end::alibabacloud-ai-search[] + From 569184871bc0006ba55c20bcbc4500e98e853aea Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:23:34 +0100 Subject: [PATCH 07/30] Add UpdateForV10 annotation (#112281) In preparation for the next major release of Elasticsearch, this commit adds the UpdateForV10 annotation. --- .../org/elasticsearch/core/UpdateForV10.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java diff --git a/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java new file mode 100644 index 000000000000..0fe816bd3721 --- /dev/null +++ b/libs/core/src/main/java/org/elasticsearch/core/UpdateForV10.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.core; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to identify a block of code (a whole class, a method, or a field) that needs to be reviewed (for cleanup, remove or change) + * before releasing 10.0 + */ +@Retention(RetentionPolicy.SOURCE) +@Target({ ElementType.LOCAL_VARIABLE, ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.METHOD, ElementType.TYPE }) +public @interface UpdateForV10 { +} From 727f1e72c6d930ad763ca307f622eadbbdfff112 Mon Sep 17 00:00:00 2001 From: Dominique Clarke Date: Thu, 29 Aug 2024 03:46:44 -0400 Subject: [PATCH 08/30] [Observability] add .slo-observability.* index privileges to built in editor and viewer roles (#111984) Today, the `editor` and `viewer` roles do not contain the appropriate index privileges for SLO users. This PR updates the index privileges to include the `.slo-observability.*` indices. --------- Co-authored-by: Slobodan Adamovic --- .../authz/store/ReservedRolesStore.java | 9 ++++ .../authz/store/ReservedRolesStoreTests.java | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java index 4f3d7a245fc8..74434adf61fb 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @@ -868,6 +868,11 @@ private static RoleDescriptor buildViewerRoleDescriptor() { .indices("/~(([.]|ilm-history-).*)/") .privileges("read", "view_index_metadata") .build(), + // Observability + RoleDescriptor.IndicesPrivileges.builder() + .indices(".slo-observability.*") + .privileges("read", "view_index_metadata") + .build(), // Security RoleDescriptor.IndicesPrivileges.builder() .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX) @@ -915,6 +920,10 @@ private static RoleDescriptor buildEditorRoleDescriptor() { .indices("observability-annotations") .privileges("read", "view_index_metadata", "write") .build(), + RoleDescriptor.IndicesPrivileges.builder() + .indices(".slo-observability.*") + .privileges("read", "view_index_metadata", "write", "manage") + .build(), // Security RoleDescriptor.IndicesPrivileges.builder() .indices(ReservedRolesStore.ALERTS_LEGACY_INDEX, ReservedRolesStore.LISTS_INDEX, ReservedRolesStore.LISTS_ITEMS_INDEX) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java index f0676f35ae31..0cdf7de63ca9 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.action.admin.indices.get.GetIndexAction; import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsAction; import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsAction; +import org.elasticsearch.action.admin.indices.mapping.put.TransportAutoPutMappingAction; import org.elasticsearch.action.admin.indices.mapping.put.TransportPutMappingAction; import org.elasticsearch.action.admin.indices.recovery.RecoveryAction; import org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction; @@ -3662,6 +3663,9 @@ public void testPredefinedViewerRole() { assertOnlyReadAllowed(role, ".profiling-" + randomIntBetween(0, 5)); assertOnlyReadAllowed(role, randomAlphaOfLength(5)); + assertOnlyReadAllowed(role, ".slo-observability." + randomIntBetween(0, 5)); + assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5)); + assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES); assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10)); assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5)); @@ -3740,6 +3744,9 @@ public void testPredefinedEditorRole() { assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".preview.alerts-" + randomIntBetween(0, 5)); assertReadWriteDocsAndMaintenanceButNotDeleteIndexAllowed(role, ".internal.preview.alerts-" + randomIntBetween(0, 5)); + assertViewIndexMetadata(role, ".slo-observability." + randomIntBetween(0, 5)); + assertReadWriteAndManage(role, ".slo-observability." + randomIntBetween(0, 5)); + assertNoAccessAllowed(role, TestRestrictedIndices.SAMPLE_RESTRICTED_NAMES); assertNoAccessAllowed(role, "." + randomAlphaOfLengthBetween(6, 10)); assertNoAccessAllowed(role, "ilm-history-" + randomIntBetween(0, 5)); @@ -3865,6 +3872,41 @@ private void assertReadWriteDocsButNotDeleteIndexAllowed(Role role, String index role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), is(false) ); + + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportUpdateAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportDeleteAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportBulkAction.NAME).test(mockIndexAbstraction(index)), is(true)); + } + + private void assertReadWriteAndManage(Role role, String index) { + assertThat( + role.indices().allowedIndicesMatcher(TransportDeleteIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportFieldCapabilitiesAction.NAME + "*").test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportCreateIndexAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat( + role.indices().allowedIndicesMatcher(TransportUpdateSettingsAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat(role.indices().allowedIndicesMatcher(GetRollupIndexCapsAction.NAME + "*").test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher("indices:admin/*").test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher("indices:monitor/*").test(mockIndexAbstraction(index)), is(true)); + assertThat( + role.indices().allowedIndicesMatcher(TransportAutoPutMappingAction.TYPE.name()).test(mockIndexAbstraction(index)), + is(true) + ); + assertThat(role.indices().allowedIndicesMatcher(AutoCreateAction.NAME).test(mockIndexAbstraction(index)), is(true)); + assertThat(role.indices().allowedIndicesMatcher(TransportSearchAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); assertThat(role.indices().allowedIndicesMatcher(TransportGetAction.TYPE.name()).test(mockIndexAbstraction(index)), is(true)); assertThat(role.indices().allowedIndicesMatcher(TransportIndexAction.NAME).test(mockIndexAbstraction(index)), is(true)); From 55ed03fddfa8c77c354a2db2910593b40d2be890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 28 Aug 2024 19:21:00 +0200 Subject: [PATCH 09/30] Remove Scorable#docID implementations This method was removed in https://github.com/apache/lucene/pull/12407 so we also need to remove it in implementations of Scorable. --- .../painless/ScriptedMetricAggContextsTests.java | 5 ----- .../join/aggregations/ParentJoinAggregator.java | 5 ----- .../aggregations/bucket/nested/NestedAggregator.java | 7 ------- .../bucket/sampler/BestDocsDeferringCollector.java | 8 -------- .../search/aggregations/MultiBucketCollectorTests.java | 10 ++-------- .../search/query/QueryPhaseCollectorTests.java | 10 ---------- .../search/sort/BucketedSortForFloatsTests.java | 6 ------ 7 files changed, 2 insertions(+), 49 deletions(-) diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java index 2d3f09fc7243..8eae139eb822 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java @@ -73,11 +73,6 @@ public void testMapBasic() throws IOException { Map state = new HashMap<>(); Scorable scorer = new Scorable() { - @Override - public int docID() { - return 0; - } - @Override public float score() { return 0.5f; diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java index 9c6a788ea2f7..ed4dcf2072b8 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java @@ -133,11 +133,6 @@ protected void prepareSubAggs(long[] ordsToCollect) throws IOException { public float score() { return 1f; } - - @Override - public int docID() { - return childDocsIter.docID(); - } }); final Bits liveDocs = ctx.reader().getLiveDocs(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 39dfd6e4aac3..28e010f541a7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -206,19 +206,12 @@ void processBufferedChildBuckets() throws IOException { } private static class CachedScorable extends Scorable { - int doc; float score; @Override public final float score() { return score; } - - @Override - public int docID() { - return doc; - } - } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java index 1344604a8d39..c72c4b29a478 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java @@ -213,7 +213,6 @@ class PerSegmentCollects extends Scorable { private final AggregationExecutionContext aggCtx; int maxDocId = Integer.MIN_VALUE; private float currentScore; - private int currentDocId = -1; private Scorable currentScorer; PerSegmentCollects(AggregationExecutionContext aggCtx) throws IOException { @@ -248,7 +247,6 @@ public void replayRelatedMatches(List sd) throws IOException { leafCollector.setScorer(this); currentScore = 0; - currentDocId = -1; if (maxDocId < 0) { return; } @@ -258,7 +256,6 @@ public void replayRelatedMatches(List sd) throws IOException { int rebased = scoreDoc.doc - aggCtx.getLeafReaderContext().docBase; if ((rebased >= 0) && (rebased <= maxDocId)) { currentScore = scoreDoc.score; - currentDocId = rebased; // We stored the bucket ID in Lucene's shardIndex property // for convenience. leafCollector.collect(rebased, scoreDoc.shardIndex); @@ -275,11 +272,6 @@ public float score() throws IOException { return currentScore; } - @Override - public int docID() { - return currentDocId; - } - public void collect(int docId, long parentBucket) throws IOException { perBucketSamples = bigArrays.grow(perBucketSamples, parentBucket + 1); PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java index cfb9c4bb8324..ff4ad059559f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java @@ -32,14 +32,8 @@ import static org.hamcrest.Matchers.equalTo; public class MultiBucketCollectorTests extends ESTestCase { - private static class ScoreAndDoc extends Scorable { + private static class Score extends Scorable { float score; - int doc = -1; - - @Override - public int docID() { - return doc; - } @Override public float score() { @@ -246,7 +240,7 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { collector1 = new TerminateAfterBucketCollector(collector1, 1); collector2 = new TerminateAfterBucketCollector(collector2, 2); - Scorable scorer = new ScoreAndDoc(); + Scorable scorer = new Score(); List collectors = Arrays.asList(collector1, collector2); Collections.shuffle(collectors, random()); diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java index f222e697488d..dbfd9d83ee88 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java @@ -1138,11 +1138,6 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { public float score() { return 0; } - - @Override - public int docID() { - return 0; - } }; QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector( @@ -1472,11 +1467,6 @@ public float score() throws IOException { return 0; } - @Override - public int docID() { - return 0; - } - @Override public void setMinCompetitiveScore(float minScore) { setMinCompetitiveScoreCalled = true; diff --git a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java index 0f088d2948fc..7f136a097e24 100644 --- a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java +++ b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java @@ -120,18 +120,12 @@ public void testScorer() throws IOException { } private class MockScorable extends Scorable { - private int doc; private float score; @Override public float score() throws IOException { return score; } - - @Override - public int docID() { - return doc; - } } /** From 5e455db10ecbb1a31cad58ecb1120a66fc50079f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 29 Aug 2024 10:04:27 +0200 Subject: [PATCH 10/30] Revert "Remove Scorable#docID implementations" This reverts commit 55ed03fddfa8c77c354a2db2910593b40d2be890. --- .../painless/ScriptedMetricAggContextsTests.java | 5 +++++ .../join/aggregations/ParentJoinAggregator.java | 5 +++++ .../aggregations/bucket/nested/NestedAggregator.java | 7 +++++++ .../bucket/sampler/BestDocsDeferringCollector.java | 8 ++++++++ .../search/aggregations/MultiBucketCollectorTests.java | 10 ++++++++-- .../search/query/QueryPhaseCollectorTests.java | 10 ++++++++++ .../search/sort/BucketedSortForFloatsTests.java | 6 ++++++ 7 files changed, 49 insertions(+), 2 deletions(-) diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java index 8eae139eb822..2d3f09fc7243 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/ScriptedMetricAggContextsTests.java @@ -73,6 +73,11 @@ public void testMapBasic() throws IOException { Map state = new HashMap<>(); Scorable scorer = new Scorable() { + @Override + public int docID() { + return 0; + } + @Override public float score() { return 0.5f; diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java index ed4dcf2072b8..9c6a788ea2f7 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java @@ -133,6 +133,11 @@ protected void prepareSubAggs(long[] ordsToCollect) throws IOException { public float score() { return 1f; } + + @Override + public int docID() { + return childDocsIter.docID(); + } }); final Bits liveDocs = ctx.reader().getLiveDocs(); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 28e010f541a7..39dfd6e4aac3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -206,12 +206,19 @@ void processBufferedChildBuckets() throws IOException { } private static class CachedScorable extends Scorable { + int doc; float score; @Override public final float score() { return score; } + + @Override + public int docID() { + return doc; + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java index c72c4b29a478..1344604a8d39 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java @@ -213,6 +213,7 @@ class PerSegmentCollects extends Scorable { private final AggregationExecutionContext aggCtx; int maxDocId = Integer.MIN_VALUE; private float currentScore; + private int currentDocId = -1; private Scorable currentScorer; PerSegmentCollects(AggregationExecutionContext aggCtx) throws IOException { @@ -247,6 +248,7 @@ public void replayRelatedMatches(List sd) throws IOException { leafCollector.setScorer(this); currentScore = 0; + currentDocId = -1; if (maxDocId < 0) { return; } @@ -256,6 +258,7 @@ public void replayRelatedMatches(List sd) throws IOException { int rebased = scoreDoc.doc - aggCtx.getLeafReaderContext().docBase; if ((rebased >= 0) && (rebased <= maxDocId)) { currentScore = scoreDoc.score; + currentDocId = rebased; // We stored the bucket ID in Lucene's shardIndex property // for convenience. leafCollector.collect(rebased, scoreDoc.shardIndex); @@ -272,6 +275,11 @@ public float score() throws IOException { return currentScore; } + @Override + public int docID() { + return currentDocId; + } + public void collect(int docId, long parentBucket) throws IOException { perBucketSamples = bigArrays.grow(perBucketSamples, parentBucket + 1); PerParentBucketSamples sampler = perBucketSamples.get((int) parentBucket); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java index ff4ad059559f..cfb9c4bb8324 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/MultiBucketCollectorTests.java @@ -32,8 +32,14 @@ import static org.hamcrest.Matchers.equalTo; public class MultiBucketCollectorTests extends ESTestCase { - private static class Score extends Scorable { + private static class ScoreAndDoc extends Scorable { float score; + int doc = -1; + + @Override + public int docID() { + return doc; + } @Override public float score() { @@ -240,7 +246,7 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { collector1 = new TerminateAfterBucketCollector(collector1, 1); collector2 = new TerminateAfterBucketCollector(collector2, 2); - Scorable scorer = new Score(); + Scorable scorer = new ScoreAndDoc(); List collectors = Arrays.asList(collector1, collector2); Collections.shuffle(collectors, random()); diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java index dbfd9d83ee88..f222e697488d 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java @@ -1138,6 +1138,11 @@ public void testSetScorerAfterCollectionTerminated() throws IOException { public float score() { return 0; } + + @Override + public int docID() { + return 0; + } }; QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector( @@ -1467,6 +1472,11 @@ public float score() throws IOException { return 0; } + @Override + public int docID() { + return 0; + } + @Override public void setMinCompetitiveScore(float minScore) { setMinCompetitiveScoreCalled = true; diff --git a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java index 7f136a097e24..0f088d2948fc 100644 --- a/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java +++ b/server/src/test/java/org/elasticsearch/search/sort/BucketedSortForFloatsTests.java @@ -120,12 +120,18 @@ public void testScorer() throws IOException { } private class MockScorable extends Scorable { + private int doc; private float score; @Override public float score() throws IOException { return score; } + + @Override + public int docID() { + return doc; + } } /** From aa57a1553e3371158c23faed7a5f7c5833a6e18d Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 29 Aug 2024 09:13:30 +0100 Subject: [PATCH 11/30] [DOCS] Rewrite "What is Elasticsearch?" (Part 1) (#112213) --- docs/reference/intro.asciidoc | 132 ++++++++++-------- .../search-your-data/near-real-time.asciidoc | 2 +- 2 files changed, 72 insertions(+), 62 deletions(-) diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index 3fc23b44994a..cd9c126e7b1f 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -1,42 +1,70 @@ [[elasticsearch-intro]] == What is {es}? -_**You know, for search (and analysis)**_ - -{es} is the distributed search and analytics engine at the heart of -the {stack}. {ls} and {beats} facilitate collecting, aggregating, and -enriching your data and storing it in {es}. {kib} enables you to -interactively explore, visualize, and share insights into your data and manage -and monitor the stack. {es} is where the indexing, search, and analysis -magic happens. - -{es} provides near real-time search and analytics for all types of data. Whether you -have structured or unstructured text, numerical data, or geospatial data, -{es} can efficiently store and index it in a way that supports fast searches. -You can go far beyond simple data retrieval and aggregate information to discover -trends and patterns in your data. And as your data and query volume grows, the -distributed nature of {es} enables your deployment to grow seamlessly right -along with it. - -While not _every_ problem is a search problem, {es} offers speed and flexibility -to handle data in a wide variety of use cases: - -* Add a search box to an app or website -* Store and analyze logs, metrics, and security event data -* Use machine learning to automatically model the behavior of your data in real - time -* Use {es} as a vector database to create, store, and search vector embeddings -* Automate business workflows using {es} as a storage engine -* Manage, integrate, and analyze spatial information using {es} as a geographic - information system (GIS) -* Store and process genetic data using {es} as a bioinformatics research tool - -We’re continually amazed by the novel ways people use search. But whether -your use case is similar to one of these, or you're using {es} to tackle a new -problem, the way you work with your data, documents, and indices in {es} is -the same. + +{es-repo}[{es}] is a distributed search and analytics engine, scalable data store, and vector database built on Apache Lucene. +It's optimized for speed and relevance on production-scale workloads. +Use {es} to search, index, store, and analyze data of all shapes and sizes in near real time. + +[TIP] +==== +{es} has a lot of features. Explore the full list on the https://www.elastic.co/elasticsearch/features[product webpage^]. +==== + +{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack] and powers the Elastic https://www.elastic.co/enterprise-search[Search], https://www.elastic.co/observability[Observability] and https://www.elastic.co/security[Security] solutions. + +{es} is used for a wide and growing range of use cases. Here are a few examples: + +* *Monitor log and event data*. Store logs, metrics, and event data for observability and security information and event management (SIEM). +* *Build search applications*. Add search capabilities to apps or websites, or build enterprise search engines over your organization's internal data sources. +* *Vector database*. Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models. +* *Retrieval augmented generation (RAG)*. Use {es} as a retrieval engine to augment Generative AI models. +* *Application and security monitoring*. Monitor and analyze application performance and security data effectively. +* *Machine learning*. Use {ml} to automatically model the behavior of your data in real-time. + +This is just a sample of search, observability, and security use cases enabled by {es}. +Refer to our https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries. +// Link to demos, search labs chatbots + +[discrete] +[[elasticsearch-intro-elastic-stack]] +.What is the Elastic Stack? +******************************* +{es} is the core component of the Elastic Stack, a suite of products for collecting, storing, searching, and visualizing data. +https://www.elastic.co/guide/en/starting-with-the-elasticsearch-platform-and-its-solutions/current/stack-components.html[Learn more about the Elastic Stack]. +******************************* +// TODO: Remove once we've moved Stack Overview to a subpage? + +[discrete] +[[elasticsearch-intro-deploy]] +=== Deployment options + +To use {es}, you need a running instance of the {es} service. +You can deploy {es} in various ways: + +* <>. Get started quickly with a minimal local Docker setup. +* {cloud}/ec-getting-started-trial.html[*Elastic Cloud*]. {es} is available as part of our hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14 day free trial]. +* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]. Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14 day free trial]. + +**Advanced deployment options** + +* <>. Install, configure, and run {es} on your own premises. +* {ece-ref}/Elastic-Cloud-Enterprise-overview.html[*Elastic Cloud Enterprise*]. Deploy Elastic Cloud on public or private clouds, virtual machines, or your own premises. +* {eck-ref}/k8s-overview.html[*Elastic Cloud on Kubernetes*]. Deploy Elastic Cloud on Kubernetes. + +[discrete] +[[elasticsearch-next-steps]] +=== Learn more + +Here are some resources to help you get started: + +* <>. A beginner's guide to deploying your first {es} instance, indexing data, and running queries. +* https://elastic.co/webinars/getting-started-elasticsearch[Webinar: Introduction to {es}]. Register for our live webinars to learn directly from {es} experts. +* https://www.elastic.co/search-labs[Elastic Search Labs]. Tutorials and blogs that explore AI-powered search using the latest {es} features. +** Follow our tutorial https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[to build a hybrid search solution in Python]. +** Check out the https://github.com/elastic/elasticsearch-labs?tab=readme-ov-file#elasticsearch-examples--apps[`elasticsearch-labs` repository] for a range of Python notebooks and apps for various use cases. [[documents-indices]] -=== Data in: documents and indices +=== Documents and indices {es} is a distributed document store. Instead of storing information as rows of columnar data, {es} stores complex data structures that have been serialized @@ -65,8 +93,7 @@ behavior makes it easy to index and explore your data--just start indexing documents and {es} will detect and map booleans, floating point and integer values, dates, and strings to the appropriate {es} data types. -Ultimately, however, you know more about your data and how you want to use it -than {es} can. You can define rules to control dynamic mapping and explicitly +You can define rules to control dynamic mapping and explicitly define mappings to take full control of how fields are stored and indexed. Defining your own mappings enables you to: @@ -89,7 +116,7 @@ used at search time. When you query a full-text field, the query text undergoes the same analysis before the terms are looked up in the index. [[search-analyze]] -=== Information out: search and analyze +=== Search and analyze While you can use {es} as a document store and retrieve documents and their metadata, the real power comes from being able to easily access the full suite @@ -160,27 +187,8 @@ size 70 needles, you’re displaying a count of the size 70 needles that match your users' search criteria--for example, all size 70 _non-stick embroidery_ needles. -[discrete] -[[more-features]] -===== But wait, there’s more - -Want to automate the analysis of your time series data? You can use -{ml-docs}/ml-ad-overview.html[machine learning] features to create accurate -baselines of normal behavior in your data and identify anomalous patterns. With -machine learning, you can detect: - -* Anomalies related to temporal deviations in values, counts, or frequencies -* Statistical rarity -* Unusual behaviors for a member of a population - -And the best part? You can do this without having to specify algorithms, models, -or other data science-related configurations. - [[scalability]] -=== Scalability and resilience: clusters, nodes, and shards -++++ -Scalability and resilience -++++ +=== Scalability and resilience {es} is built to be always available and to scale with your needs. It does this by being distributed by nature. You can add servers (nodes) to a cluster to @@ -209,7 +217,7 @@ interrupting indexing or query operations. [discrete] [[it-depends]] -==== It depends... +==== Shard size and number of shards There are a number of performance considerations and trade offs with respect to shard size and the number of primary shards configured for an index. The more @@ -237,7 +245,7 @@ testing with your own data and queries]. [discrete] [[disaster-ccr]] -==== In case of disaster +==== Disaster recovery A cluster's nodes need good, reliable connections to each other. To provide better connections, you typically co-locate the nodes in the same data center or @@ -257,7 +265,7 @@ secondary clusters are read-only followers. [discrete] [[admin]] -==== Care and feeding +==== Security, management, and monitoring As with any enterprise system, you need tools to secure, manage, and monitor your {es} clusters. Security, monitoring, and administrative features @@ -265,3 +273,5 @@ that are integrated into {es} enable you to use {kibana-ref}/introduction.html[{ as a control center for managing a cluster. Features like <> and <> help you intelligently manage your data over time. + +Refer to <> for more information. \ No newline at end of file diff --git a/docs/reference/search/search-your-data/near-real-time.asciidoc b/docs/reference/search/search-your-data/near-real-time.asciidoc index 46a996c237c3..47618ecd9fd7 100644 --- a/docs/reference/search/search-your-data/near-real-time.asciidoc +++ b/docs/reference/search/search-your-data/near-real-time.asciidoc @@ -2,7 +2,7 @@ [[near-real-time]] === Near real-time search -The overview of <> indicates that when a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? +When a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? Lucene, the Java libraries on which {es} is based, introduced the concept of per-segment search. A _segment_ is similar to an inverted index, but the word _index_ in Lucene means "a collection of segments plus a commit point". After a commit, a new segment is added to the commit point and the buffer is cleared. From 320ccbc24748809feecc42df1f7bab6c4d6fd4cc Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:25:04 +0300 Subject: [PATCH 12/30] Reduce load for stress test to avoid oom (#112331) Fixes #112326 --- .../logsdb/datageneration/DataGeneratorTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java index db3b81891e87..4a4ffca0f37a 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java @@ -113,13 +113,13 @@ protected Collection getPlugins() { } public void testDataGeneratorStressTest() throws IOException { - // Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object). + // Let's generate 125000 fields to test an extreme case (2 levels of objects + 1 leaf level with 50 fields per object). var testChildFieldGenerator = new DataSourceResponse.ChildFieldGenerator() { private int generatedFields = 0; @Override public int generateChildFieldCount() { - return 100; + return 50; } @Override From 2c29a3ae0a6e743c2df72df5895e90aa56dd2683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 29 Aug 2024 12:43:10 +0200 Subject: [PATCH 13/30] [DOCS] Highlights auto-chunking in intro of semantic text. (#111836) --- docs/reference/mapping/types/semantic-text.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index 522a0c54c8aa..a006f288dc66 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -7,8 +7,8 @@ beta[] -The `semantic_text` field type automatically generates embeddings for text -content using an inference endpoint. +The `semantic_text` field type automatically generates embeddings for text content using an inference endpoint. +Long passages are <> to smaller sections to enable the processing of larger corpuses of text. The `semantic_text` field type specifies an inference endpoint identifier that will be used to generate embeddings. You can create the inference endpoint by using the <>. From 35fe3a9c47500ab21735f7c40f7184fb7d724f9c Mon Sep 17 00:00:00 2001 From: weizijun Date: Thu, 29 Aug 2024 19:46:58 +0800 Subject: [PATCH 14/30] some fixed (#112332) --- .../inference/service-alibabacloud-ai-search.asciidoc | 2 +- docs/reference/inference/service-amazon-bedrock.asciidoc | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc index df5220573d9e..23a3d532635a 100644 --- a/docs/reference/inference/service-alibabacloud-ai-search.asciidoc +++ b/docs/reference/inference/service-alibabacloud-ai-search.asciidoc @@ -25,7 +25,7 @@ include::inference-shared.asciidoc[tag=task-type] Available task types: * `text_embedding`, -* `sparse_embedding`. +* `sparse_embedding`, * `rerank`. -- diff --git a/docs/reference/inference/service-amazon-bedrock.asciidoc b/docs/reference/inference/service-amazon-bedrock.asciidoc index 4ffa368613a0..dbffd5c26fbc 100644 --- a/docs/reference/inference/service-amazon-bedrock.asciidoc +++ b/docs/reference/inference/service-amazon-bedrock.asciidoc @@ -122,14 +122,6 @@ Only available for `anthropic`, `cohere`, and `mistral` providers. Alternative to `temperature`. Limits samples to the top-K most likely words, balancing coherence and variability. Should not be used if `temperature` is specified. -===== -+ -.`task_settings` for the `text_embedding` task type -[%collapsible%closed] -===== - -There are no `task_settings` available for the `text_embedding` task type. - ===== [discrete] From b4c8fa362dc88d9d1220c7466ec2c0219a258433 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Thu, 29 Aug 2024 13:50:24 +0200 Subject: [PATCH 15/30] Reenable 26_aggs_bucket EsqlClientYamlIT (#112343) Reenable 26_aggs_bucket EsqlClientYamlIT, fixed in #111897. Fixes #111901, fixes #111902. --- muted-tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 508403ee6238..e4c2f62d2617 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -122,12 +122,6 @@ tests: - class: org.elasticsearch.xpack.restart.CoreFullClusterRestartIT method: testSnapshotRestore {cluster=UPGRADED} issue: https://github.com/elastic/elasticsearch/issues/111799 -- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT - method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval hourly: #110916}" - issue: https://github.com/elastic/elasticsearch/issues/111901 -- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT - method: "test {p0=esql/26_aggs_bucket/friendlier BUCKET interval: monthly #110916}" - issue: https://github.com/elastic/elasticsearch/issues/111902 - class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT method: testScaledFloat issue: https://github.com/elastic/elasticsearch/issues/112003 From a97b0e226e3d7ea5e27eb565ae05d01ca22b06a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Thu, 29 Aug 2024 13:59:16 +0200 Subject: [PATCH 16/30] Fix test failures in ScriptScoreQueryTests (#112334) --- muted-tests.yml | 6 ------ .../elasticsearch/search/query/ScriptScoreQueryTests.java | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index e4c2f62d2617..e80a39040a4e 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -151,12 +151,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testDeleteJobAsync issue: https://github.com/elastic/elasticsearch/issues/112212 -- class: org.elasticsearch.search.query.ScriptScoreQueryTests - method: testScriptTermStatsAvailable - issue: https://github.com/elastic/elasticsearch/issues/112278 -- class: org.elasticsearch.search.query.ScriptScoreQueryTests - method: testScriptTermStatsNotAvailable - issue: https://github.com/elastic/elasticsearch/issues/112290 - class: org.elasticsearch.search.retriever.rankdoc.RankDocsSortBuilderTests method: testEqualsAndHashcode issue: https://github.com/elastic/elasticsearch/issues/112312 diff --git a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java index d6b1da9f76b4..177968b9a132 100644 --- a/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/ScriptScoreQueryTests.java @@ -72,7 +72,7 @@ public void initSearcher() throws IOException { w.commit(); reader = DirectoryReader.open(w); searcher = newSearcher(reader); - leafReaderContext = reader.leaves().get(0); + leafReaderContext = searcher.getTopReaderContext().leaves().get(0); } @After From a69f8e19ed4513d552b24a655f45b38098336b26 Mon Sep 17 00:00:00 2001 From: Albert Zaharovits Date: Thu, 29 Aug 2024 15:09:28 +0300 Subject: [PATCH 17/30] Avoid redundant cluster state build (#112340) Avoid redundant cluster state build when creating index --- .../cluster/metadata/MetadataCreateIndexService.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index b5ee0ebd7e38..b1a19d99dcb1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -1249,11 +1249,10 @@ static ClusterState clusterStateCreateIndex( ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks); blocks.updateBlocks(indexMetadata); - ClusterState updatedState = ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) + .addAsNew(newMetadata.index(indexName)); - RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, updatedState.routingTable()) - .addAsNew(updatedState.metadata().index(indexName)); - return ClusterState.builder(updatedState).routingTable(routingTableBuilder.build()).build(); + return ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).routingTable(routingTableBuilder).build(); } static IndexMetadata buildIndexMetadata( From cefe358b4197332aca6b4d15d440851033134d61 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 29 Aug 2024 14:15:29 +0200 Subject: [PATCH 18/30] Fix DLS using runtime fields and synthetic source (#112341) Somewhat of a tortured test but applying the same fix from #112260 to synthetic source which was running into the same bug as a stored field source. --- docs/changelog/112341.yaml | 5 +++ .../lookup/SyntheticSourceProvider.java | 36 +++++------------- .../DocumentLevelSecurityRandomTests.java | 38 ++++++++++++++++++- 3 files changed, 52 insertions(+), 27 deletions(-) create mode 100644 docs/changelog/112341.yaml diff --git a/docs/changelog/112341.yaml b/docs/changelog/112341.yaml new file mode 100644 index 000000000000..8f44b53ad999 --- /dev/null +++ b/docs/changelog/112341.yaml @@ -0,0 +1,5 @@ +pr: 112341 +summary: Fix DLS using runtime fields and synthetic source +area: Authorization +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java index bccfc22dc7e9..a4549f0814a0 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SyntheticSourceProvider.java @@ -8,13 +8,14 @@ package org.elasticsearch.search.lookup; -import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.index.fieldvisitor.StoredFieldLoader; import org.elasticsearch.index.mapper.SourceLoader; import java.io.IOException; +import java.util.Map; // NB This is written under the assumption that individual segments are accessed by a single // thread, even if separate segments may be searched concurrently. If we ever implement @@ -22,7 +23,7 @@ class SyntheticSourceProvider implements SourceProvider { private final SourceLoader sourceLoader; - private volatile SyntheticSourceLeafLoader[] leafLoaders; + private final Map leaves = ConcurrentCollections.newConcurrentMap(); SyntheticSourceProvider(SourceLoader sourceLoader) { this.sourceLoader = sourceLoader; @@ -30,31 +31,14 @@ class SyntheticSourceProvider implements SourceProvider { @Override public Source getSource(LeafReaderContext ctx, int doc) throws IOException { - maybeInit(ctx); - if (leafLoaders[ctx.ord] == null) { - // individual segments are currently only accessed on one thread so there's no need - // for locking here. - leafLoaders[ctx.ord] = new SyntheticSourceLeafLoader(ctx); + final Object id = ctx.id(); + var provider = leaves.get(id); + if (provider == null) { + provider = new SyntheticSourceLeafLoader(ctx); + var existing = leaves.put(id, provider); + assert existing == null : "unexpected source provider [" + existing + "]"; } - return leafLoaders[ctx.ord].getSource(doc); - } - - private void maybeInit(LeafReaderContext ctx) { - if (leafLoaders == null) { - synchronized (this) { - if (leafLoaders == null) { - leafLoaders = new SyntheticSourceLeafLoader[findParentContext(ctx).leaves().size()]; - } - } - } - } - - private IndexReaderContext findParentContext(LeafReaderContext ctx) { - if (ctx.parent != null) { - return ctx.parent; - } - assert ctx.isTopLevel; - return ctx; + return provider.getSource(doc); } private class SyntheticSourceLeafLoader { diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java index fb7463197081..1bf7d8934775 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityRandomTests.java @@ -144,6 +144,43 @@ public void testWithRuntimeFields() throws Exception { .endObject() ) ); + doTestWithRuntimeFieldsInTestIndex(); + } + + public void testWithRuntimeFieldsAndSyntheticSource() throws Exception { + assertAcked( + indicesAdmin().prepareCreate("test") + .setMapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .field("mode", "synthetic") + .endObject() + .startObject("runtime") + .startObject("field1") + .field("type", "keyword") + .endObject() + .startObject("field2") + .field("type", "keyword") + .endObject() + .endObject() + .startObject("properties") + .startObject("field1") + .field("type", "text") + .field("store", true) + .endObject() + .startObject("field2") + .field("type", "text") + .field("store", true) + .endObject() + .endObject() + .endObject() + ) + ); + doTestWithRuntimeFieldsInTestIndex(); + } + + private void doTestWithRuntimeFieldsInTestIndex() { List requests = new ArrayList<>(47); for (int i = 1; i <= 42; i++) { requests.add(prepareIndex("test").setSource("field1", "value1", "field2", "foo" + i)); @@ -158,5 +195,4 @@ public void testWithRuntimeFields() throws Exception { 42L ); } - } From 9387ce335757194da1986722a98f95338a45a873 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 13:16:37 +0100 Subject: [PATCH 19/30] Deduplicate unstable-cluster troubleshooting docs (#112333) We duplicated these docs in order to avoid breaking older links, but this makes it confusing and hard to link to the right copy of the information. This commit removes the duplication by replacing the docs at the old locations with stubs that link to the new locations. --- .../discovery/fault-detection.asciidoc | 295 +--------------- .../troubleshooting-unstable-cluster.asciidoc | 314 +++++++++++++++++- .../common/reference-docs-links.json | 4 +- 3 files changed, 321 insertions(+), 292 deletions(-) diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index d12985b70597..21f4ae2317e6 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -35,313 +35,30 @@ starting from the beginning of the cluster state update. Refer to [[cluster-fault-detection-troubleshooting]] ==== Troubleshooting an unstable cluster -//tag::troubleshooting[] -Normally, a node will only leave a cluster if deliberately shut down. If a node -leaves the cluster unexpectedly, it's important to address the cause. A cluster -in which nodes leave unexpectedly is unstable and can create several issues. -For instance: -* The cluster health may be yellow or red. - -* Some shards will be initializing and other shards may be failing. - -* Search, indexing, and monitoring operations may fail and report exceptions in -logs. - -* The `.security` index may be unavailable, blocking access to the cluster. - -* The master may appear busy due to frequent cluster state updates. - -To troubleshoot a cluster in this state, first ensure the cluster has a -<>. Next, focus on the nodes -unexpectedly leaving the cluster ahead of all other issues. It will not be -possible to solve other issues until the cluster has a stable master node and -stable node membership. - -Diagnostics and statistics are usually not useful in an unstable cluster. These -tools only offer a view of the state of the cluster at a single point in time. -Instead, look at the cluster logs to see the pattern of behaviour over time. -Focus particularly on logs from the elected master. When a node leaves the -cluster, logs for the elected master include a message like this (with line -breaks added to make it easier to read): - -[source,text] ----- -[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000] - node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}] - with reason [disconnected] ----- - -This message says that the `NodeLeftExecutor` on the elected master -(`instance-0000000000`) processed a `node-left` task, identifying the node that -was removed and the reason for its removal. When the node joins the cluster -again, logs for the elected master will include a message like this (with line -breaks added to make it easier to read): - -[source,text] ----- -[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000] - node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}] - with reason [joining after restart, removed [24s] ago with reason [disconnected]] ----- - -This message says that the `NodeJoinExecutor` on the elected master -(`instance-0000000000`) processed a `node-join` task, identifying the node that -was added to the cluster and the reason for the task. - -Other nodes may log similar messages, but report fewer details: - -[source,text] ----- -[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService] - [instance-0000000001] removed { - {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m} - {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv} - }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415} ----- - -These messages are not especially useful for troubleshooting, so focus on the -ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted -on the elected master and which contain more details. If you don't see the -messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that: - -* You're looking at the logs for the elected master node. - -* The logs cover the correct time period. - -* Logging is enabled at `INFO` level. - -Nodes will also log a message containing `master node changed` whenever they -start or stop following the elected master. You can use these messages to -determine each node's view of the state of the master over time. - -If a node restarts, it will leave the cluster and then join the cluster again. -When it rejoins, the `NodeJoinExecutor` will log that it processed a -`node-join` task indicating that the node is `joining after restart`. If a node -is unexpectedly restarting, look at the node's logs to see why it is shutting -down. - -The <> API on the affected node will also provide some useful -information about the situation. - -If the node did not restart then you should look at the reason for its -departure more closely. Each reason has different troubleshooting steps, -described below. There are three possible reasons: - -* `disconnected`: The connection from the master node to the removed node was -closed. - -* `lagging`: The master published a cluster state update, but the removed node -did not apply it within the permitted timeout. By default, this timeout is 2 -minutes. Refer to <> for information about the -settings which control this mechanism. - -* `followers check retry count exceeded`: The master sent a number of -consecutive health checks to the removed node. These checks were rejected or -timed out. By default, each health check times out after 10 seconds and {es} -removes the node removed after three consecutively failed health checks. Refer -to <> for information about the settings which -control this mechanism. +See <>. [discrete] ===== Diagnosing `disconnected` nodes -Nodes typically leave the cluster with reason `disconnected` when they shut -down, but if they rejoin the cluster without restarting then there is some -other problem. - -{es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open -<>. If a connection is closed then {es} will -try and reconnect, so the occasional blip may fail some in-flight operations -but should otherwise have limited impact on the cluster. In contrast, -repeatedly-dropped connections will severely affect its operation. - -The connections from the elected master node to every other node in the cluster -are particularly important. The elected master never spontaneously closes its -outbound connections to other nodes. Similarly, once an inbound connection is -fully established, a node never spontaneously it unless the node is shutting -down. - -If you see a node unexpectedly leave the cluster with the `disconnected` -reason, something other than {es} likely caused the connection to close. A -common cause is a misconfigured firewall with an improper timeout or another -policy that's <>. It could also -be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, configure the -following loggers to get more detailed information about network exceptions: - -[source,yaml] ----- -logger.org.elasticsearch.transport.TcpTransport: DEBUG -logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ----- - -If these logs do not show enough information to diagnose the problem, obtain a -packet capture simultaneously from the nodes at both ends of an unstable -connection and analyse it alongside the {es} logs from those nodes to determine -if traffic between the nodes is being disrupted by another device on the -network. +See <>. [discrete] ===== Diagnosing `lagging` nodes -{es} needs every node to process cluster state updates reasonably quickly. If a -node takes too long to process a cluster state update, it can be harmful to the -cluster. The master will remove these nodes with the `lagging` reason. Refer to -<> for information about the settings which control -this mechanism. - -Lagging is typically caused by performance issues on the removed node. However, -a node may also lag due to severe network delays. To rule out network delays, -ensure that `net.ipv4.tcp_retries2` is <>. Log messages that contain `warn threshold` may provide more -information about the root cause. - -If you're an advanced user, you can get more detailed information about what -the node was doing when it was removed by configuring the following logger: - -[source,yaml] ----- -logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG ----- - -When this logger is enabled, {es} will attempt to run the -<> API on the faulty node and report the results in -the logs on the elected master. The results are compressed, encoded, and split -into chunks to avoid truncation: - -[source,text] ----- -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN... -[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) ----- - -To reconstruct the output, base64-decode the data and decompress it using -`gzip`. For instance, on Unix-like systems: - -[source,sh] ----- -cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ----- +See <>. [discrete] ===== Diagnosing `follower check retry count exceeded` nodes -Nodes sometimes leave the cluster with reason `follower check retry count -exceeded` when they shut down, but if they rejoin the cluster without -restarting then there is some other problem. - -{es} needs every node to respond to network messages successfully and -reasonably quickly. If a node rejects requests or does not respond at all then -it can be harmful to the cluster. If enough consecutive checks fail then the -master will remove the node with reason `follower check retry count exceeded` -and will indicate in the `node-left` message how many of the consecutive -unsuccessful checks failed and how many of them timed out. Refer to -<> for information about the settings which control -this mechanism. - -Timeouts and failures may be due to network delays or performance problems on -the affected nodes. Ensure that `net.ipv4.tcp_retries2` is -<> to eliminate network delays as -a possible cause for this kind of instability. Log messages containing -`warn threshold` may give further clues about the cause of the instability. - -If the last check failed with an exception then the exception is reported, and -typically indicates the problem that needs to be addressed. If any of the -checks timed out then narrow down the problem as follows. - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm] - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection] - -include::../../troubleshooting/network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads] - -By default the follower checks will time out after 30s, so if node departures -are unpredictable then capture stack dumps every 15s to be sure that at least -one stack dump was taken at the right time. +See <>. [discrete] ===== Diagnosing `ShardLockObtainFailedException` failures -If a node leaves and rejoins the cluster then {es} will usually shut down and -re-initialize its shards. If the shards do not shut down quickly enough then -{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. - -To gather more information about the reason for shards shutting down slowly, -configure the following logger: - -[source,yaml] ----- -logger.org.elasticsearch.env.NodeEnvironment: DEBUG ----- - -When this logger is enabled, {es} will attempt to run the -<> API whenever it encounters a -`ShardLockObtainFailedException`. The results are compressed, encoded, and -split into chunks to avoid truncation: - -[source,text] ----- -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... -[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) ----- - -To reconstruct the output, base64-decode the data and decompress it using -`gzip`. For instance, on Unix-like systems: - -[source,sh] ----- -cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ----- +See <>. [discrete] ===== Diagnosing other network disconnections -{es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open -<>. If a connection is closed then {es} will -try and reconnect, so the occasional blip may fail some in-flight operations -but should otherwise have limited impact on the cluster. In contrast, -repeatedly-dropped connections will severely affect its operation. - -{es} nodes will only actively close an outbound connection to another node if -the other node leaves the cluster. See -<> for further information about -identifying and troubleshooting this situation. If an outbound connection -closes for some other reason, nodes will log a message such as the following: - -[source,text] ----- -[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote ----- - -Similarly, once an inbound connection is fully established, a node never -spontaneously closes it unless the node is shutting down. - -Therefore if you see a node report that a connection to another node closed -unexpectedly, something other than {es} likely caused the connection to close. -A common cause is a misconfigured firewall with an improper timeout or another -policy that's <>. It could also -be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, configure the -following loggers to get more detailed information about network exceptions: - -[source,yaml] ----- -logger.org.elasticsearch.transport.TcpTransport: DEBUG -logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ----- - -If these logs do not show enough information to diagnose the problem, obtain a -packet capture simultaneously from the nodes at both ends of an unstable -connection and analyse it alongside the {es} logs from those nodes to determine -if traffic between the nodes is being disrupted by another device on the -network. -//end::troubleshooting[] +See <>. diff --git a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc index 387ebcdcd43c..cbb35f773103 100644 --- a/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc +++ b/docs/reference/troubleshooting/troubleshooting-unstable-cluster.asciidoc @@ -1,4 +1,316 @@ [[troubleshooting-unstable-cluster]] == Troubleshooting an unstable cluster -include::../modules/discovery/fault-detection.asciidoc[tag=troubleshooting,leveloffset=-2] \ No newline at end of file +Normally, a node will only leave a cluster if deliberately shut down. If a node +leaves the cluster unexpectedly, it's important to address the cause. A cluster +in which nodes leave unexpectedly is unstable and can create several issues. +For instance: + +* The cluster health may be yellow or red. + +* Some shards will be initializing and other shards may be failing. + +* Search, indexing, and monitoring operations may fail and report exceptions in +logs. + +* The `.security` index may be unavailable, blocking access to the cluster. + +* The master may appear busy due to frequent cluster state updates. + +To troubleshoot a cluster in this state, first ensure the cluster has a +<>. Next, focus on the nodes +unexpectedly leaving the cluster ahead of all other issues. It will not be +possible to solve other issues until the cluster has a stable master node and +stable node membership. + +Diagnostics and statistics are usually not useful in an unstable cluster. These +tools only offer a view of the state of the cluster at a single point in time. +Instead, look at the cluster logs to see the pattern of behaviour over time. +Focus particularly on logs from the elected master. When a node leaves the +cluster, logs for the elected master include a message like this (with line +breaks added to make it easier to read): + +[source,text] +---- +[2022-03-21T11:02:35,513][INFO ][o.e.c.c.NodeLeftExecutor] [instance-0000000000] + node-left: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m}] + with reason [disconnected] +---- + +This message says that the `NodeLeftExecutor` on the elected master +(`instance-0000000000`) processed a `node-left` task, identifying the node that +was removed and the reason for its removal. When the node joins the cluster +again, logs for the elected master will include a message like this (with line +breaks added to make it easier to read): + +[source,text] +---- +[2022-03-21T11:02:59,892][INFO ][o.e.c.c.NodeJoinExecutor] [instance-0000000000] + node-join: [{instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{UNw_RuazQCSBskWZV8ID_w}{172.27.47.21}{172.27.47.21:19054}{m}] + with reason [joining after restart, removed [24s] ago with reason [disconnected]] +---- + +This message says that the `NodeJoinExecutor` on the elected master +(`instance-0000000000`) processed a `node-join` task, identifying the node that +was added to the cluster and the reason for the task. + +Other nodes may log similar messages, but report fewer details: + +[source,text] +---- +[2020-01-29T11:02:36,985][INFO ][o.e.c.s.ClusterApplierService] + [instance-0000000001] removed { + {instance-0000000004}{bfcMDTiDRkietFb9v_di7w}{aNlyORLASam1ammv2DzYXA}{172.27.47.21}{172.27.47.21:19054}{m} + {tiebreaker-0000000003}{UNw_RuazQCSBskWZV8ID_w}{bltyVOQ-RNu20OQfTHSLtA}{172.27.161.154}{172.27.161.154:19251}{mv} + }, term: 14, version: 1653415, reason: Publication{term=14, version=1653415} +---- + +These messages are not especially useful for troubleshooting, so focus on the +ones from the `NodeLeftExecutor` and `NodeJoinExecutor` which are only emitted +on the elected master and which contain more details. If you don't see the +messages from the `NodeLeftExecutor` and `NodeJoinExecutor`, check that: + +* You're looking at the logs for the elected master node. + +* The logs cover the correct time period. + +* Logging is enabled at `INFO` level. + +Nodes will also log a message containing `master node changed` whenever they +start or stop following the elected master. You can use these messages to +determine each node's view of the state of the master over time. + +If a node restarts, it will leave the cluster and then join the cluster again. +When it rejoins, the `NodeJoinExecutor` will log that it processed a +`node-join` task indicating that the node is `joining after restart`. If a node +is unexpectedly restarting, look at the node's logs to see why it is shutting +down. + +The <> API on the affected node will also provide some useful +information about the situation. + +If the node did not restart then you should look at the reason for its +departure more closely. Each reason has different troubleshooting steps, +described below. There are three possible reasons: + +* `disconnected`: The connection from the master node to the removed node was +closed. + +* `lagging`: The master published a cluster state update, but the removed node +did not apply it within the permitted timeout. By default, this timeout is 2 +minutes. Refer to <> for information about the +settings which control this mechanism. + +* `followers check retry count exceeded`: The master sent a number of +consecutive health checks to the removed node. These checks were rejected or +timed out. By default, each health check times out after 10 seconds and {es} +removes the node removed after three consecutively failed health checks. Refer +to <> for information about the settings which +control this mechanism. + +[discrete] +[[troubleshooting-unstable-cluster-disconnected]] +=== Diagnosing `disconnected` nodes + +Nodes typically leave the cluster with reason `disconnected` when they shut +down, but if they rejoin the cluster without restarting then there is some +other problem. + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +The connections from the elected master node to every other node in the cluster +are particularly important. The elected master never spontaneously closes its +outbound connections to other nodes. Similarly, once an inbound connection is +fully established, a node never spontaneously it unless the node is shutting +down. + +If you see a node unexpectedly leave the cluster with the `disconnected` +reason, something other than {es} likely caused the connection to close. A +common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. + +[discrete] +[[troubleshooting-unstable-cluster-lagging]] +=== Diagnosing `lagging` nodes + +{es} needs every node to process cluster state updates reasonably quickly. If a +node takes too long to process a cluster state update, it can be harmful to the +cluster. The master will remove these nodes with the `lagging` reason. Refer to +<> for information about the settings which control +this mechanism. + +Lagging is typically caused by performance issues on the removed node. However, +a node may also lag due to severe network delays. To rule out network delays, +ensure that `net.ipv4.tcp_retries2` is <>. Log messages that contain `warn threshold` may provide more +information about the root cause. + +If you're an advanced user, you can get more detailed information about what +the node was doing when it was removed by configuring the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.cluster.coordination.LagDetector: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API on the faulty node and report the results in +the logs on the elected master. The results are compressed, encoded, and split +into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.c.c.LagDetector ] [master] hot threads from node [{node}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] lagging at version [183619] despite commit of cluster state version [183620] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat lagdetector.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- + +[discrete] +[[troubleshooting-unstable-cluster-follower-check]] +=== Diagnosing `follower check retry count exceeded` nodes + +Nodes sometimes leave the cluster with reason `follower check retry count +exceeded` when they shut down, but if they rejoin the cluster without +restarting then there is some other problem. + +{es} needs every node to respond to network messages successfully and +reasonably quickly. If a node rejects requests or does not respond at all then +it can be harmful to the cluster. If enough consecutive checks fail then the +master will remove the node with reason `follower check retry count exceeded` +and will indicate in the `node-left` message how many of the consecutive +unsuccessful checks failed and how many of them timed out. Refer to +<> for information about the settings which control +this mechanism. + +Timeouts and failures may be due to network delays or performance problems on +the affected nodes. Ensure that `net.ipv4.tcp_retries2` is +<> to eliminate network delays as +a possible cause for this kind of instability. Log messages containing +`warn threshold` may give further clues about the cause of the instability. + +If the last check failed with an exception then the exception is reported, and +typically indicates the problem that needs to be addressed. If any of the +checks timed out then narrow down the problem as follows. + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-gc-vm] + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-packet-capture-fault-detection] + +include::network-timeouts.asciidoc[tag=troubleshooting-network-timeouts-threads] + +By default the follower checks will time out after 30s, so if node departures +are unpredictable then capture stack dumps every 15s to be sure that at least +one stack dump was taken at the right time. + +[discrete] +[[troubleshooting-unstable-cluster-shardlockobtainfailedexception]] +=== Diagnosing `ShardLockObtainFailedException` failures + +If a node leaves and rejoins the cluster then {es} will usually shut down and +re-initialize its shards. If the shards do not shut down quickly enough then +{es} may fail to re-initialize them due to a `ShardLockObtainFailedException`. + +To gather more information about the reason for shards shutting down slowly, +configure the following logger: + +[source,yaml] +---- +logger.org.elasticsearch.env.NodeEnvironment: DEBUG +---- + +When this logger is enabled, {es} will attempt to run the +<> API whenever it encounters a +`ShardLockObtainFailedException`. The results are compressed, encoded, and +split into chunks to avoid truncation: + +[source,text] +---- +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 1]: H4sIAAAAAAAA/x... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 2]: p7x3w1hmOQVtuV... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 3]: v7uTboMGDbyOy+... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] [part 4]: 4tse0RnPnLeDNN... +[DEBUG][o.e.e.NodeEnvironment ] [master] hot threads while failing to obtain shard lock for [index][0] (gzip compressed, base64-encoded, and split into 4 parts on preceding log lines) +---- + +To reconstruct the output, base64-decode the data and decompress it using +`gzip`. For instance, on Unix-like systems: + +[source,sh] +---- +cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress +---- + +[discrete] +[[troubleshooting-unstable-cluster-network]] +=== Diagnosing other network disconnections + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +{es} nodes will only actively close an outbound connection to another node if +the other node leaves the cluster. See +<> for further information about +identifying and troubleshooting this situation. If an outbound connection +closes for some other reason, nodes will log a message such as the following: + +[source,text] +---- +[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote +---- + +Similarly, once an inbound connection is fully established, a node never +spontaneously closes it unless the node is shutting down. + +Therefore if you see a node report that a connection to another node closed +unexpectedly, something other than {es} likely caused the connection to close. +A common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index 3eb8939c22a6..cc0bc5e2257c 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -2,8 +2,8 @@ "INITIAL_MASTER_NODES": "important-settings.html#initial_master_nodes", "DISCOVERY_TROUBLESHOOTING": "discovery-troubleshooting.html", "UNSTABLE_CLUSTER_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html", - "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_lagging_nodes_2", - "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#_diagnosing_shardlockobtainfailedexception_failures_2", + "LAGGING_NODE_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-lagging", + "SHARD_LOCK_TROUBLESHOOTING": "troubleshooting-unstable-cluster.html#troubleshooting-unstable-cluster-shardlockobtainfailedexception", "CONCURRENT_REPOSITORY_WRITERS": "diagnosing-corrupted-repositories.html", "ARCHIVE_INDICES": "archive-indices.html", "HTTP_TRACER": "modules-network.html#http-rest-request-tracer", From 5ac4d8c71e06880624a9a91bfec4ae310d9cab2f Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Thu, 29 Aug 2024 14:48:15 +0200 Subject: [PATCH 20/30] Fix union-types where one index is missing the field (#111932) * Fix union-types where one index is missing the field When none of the indexes has the field, a validation error is correctly thrown, and when all indexes have the field, union-types works as normal. But when some indexes have the field and some do not, we were getting and internal error. We treat this case similarly to when some documents are missing the field, in which case `null` values are produced. So now a multi-index query where some indexes are missing the field will produce nulls for the documents coming from those indexes. * Update docs/changelog/111932.yaml * Added capability for this fix (missing-field) --- docs/changelog/111932.yaml | 6 ++ .../xpack/esql/CsvTestsDataLoader.java | 6 ++ .../mapping-missing_ip_sample_data.json | 13 ++++ .../main/resources/missing_ip_sample_data.csv | 8 +++ .../src/main/resources/union_types.csv-spec | 68 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 5 ++ .../planner/EsPhysicalOperationProviders.java | 4 +- 7 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/111932.yaml create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv diff --git a/docs/changelog/111932.yaml b/docs/changelog/111932.yaml new file mode 100644 index 000000000000..ce840ecebcff --- /dev/null +++ b/docs/changelog/111932.yaml @@ -0,0 +1,6 @@ +pr: 111932 +summary: Fix union-types where one index is missing the field +area: ES|QL +type: bug +issues: + - 111912 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index b20e3bb0d540..9ee22113a424 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -68,6 +68,11 @@ public class CsvTestsDataLoader { "mapping-sample_data_ts_long.json", "sample_data_ts_long.csv" ); + private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset( + "missing_ip_sample_data", + "mapping-missing_ip_sample_data.json", + "missing_ip_sample_data.csv" + ); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv"); private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv"); @@ -112,6 +117,7 @@ public class CsvTestsDataLoader { Map.entry(ALERTS.indexName, ALERTS), Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), + Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json new file mode 100644 index 000000000000..6f3796dd7715 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-missing_ip_sample_data.json @@ -0,0 +1,13 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv new file mode 100644 index 000000000000..e8e9ddcaee83 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/missing_ip_sample_data.csv @@ -0,0 +1,8 @@ +@timestamp:date,event_duration:long,message:keyword +2023-10-23T13:55:01.543Z,1756467,Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z,5033755,Connection error +2023-10-23T13:52:55.015Z,8268153,Connection error +2023-10-23T13:51:54.732Z,725448,Connection error +2023-10-23T13:33:34.937Z,1232382,Disconnected +2023-10-23T12:27:28.948Z,2764889,Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 6819727be013..c6a2d47a78dc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -405,6 +405,74 @@ count:long | message:keyword 2 | Connected to 10.1.0.3 ; +multiIndexMissingIpToString +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_STRING(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:keyword | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexMissingIpToIp +required_capability: union_types +required_capability: union_types_missing_field + +FROM sample_data, sample_data_str, missing_ip_sample_data METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +missing_ip_sample_data | 2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +missing_ip_sample_data | 2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +missing_ip_sample_data | 2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +missing_ip_sample_data | 2023-10-23T13:51:54.732Z | null | 725448 | Connection error +missing_ip_sample_data | 2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +missing_ip_sample_data | 2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +missing_ip_sample_data | 2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + multiIndexTsLong required_capability: union_types required_capability: metadata_fields diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 81b2ba71b880..120323ebeb7a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -183,6 +183,11 @@ public enum Cap { */ UNION_TYPES_FIX_RENAME_RESOLUTION, + /** + * Fix for union-types when some indexes are missing the required field. Done in #111932. + */ + UNION_TYPES_MISSING_FIELD, + /** * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. * see Parsing large numbers is inconsistent #104323 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 8fddb7407a02..04be73148426 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -138,7 +138,9 @@ private BlockLoader getBlockLoaderFor( if (unionTypes != null) { String indexName = shardContext.ctx.index().getName(); Expression conversion = unionTypes.getConversionExpressionForIndex(indexName); - return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); + return conversion == null + ? BlockLoader.CONSTANT_NULLS + : new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); } return blockLoader; } From 5c200afb9e3a02400cfdf45c4469c30bd1417223 Mon Sep 17 00:00:00 2001 From: "Mark J. Hoy" Date: Thu, 29 Aug 2024 09:12:03 -0400 Subject: [PATCH 21/30] [ML] Adds Explain Functionality to LTR Rescoring (#112155) --- .../integration/LearningToRankRescorerIT.java | 433 ++++++++++-------- .../inference/ltr/LearningToRankRescorer.java | 55 ++- 2 files changed, 303 insertions(+), 185 deletions(-) diff --git a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java index b2a0b60aed7b..4a703117c655 100644 --- a/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java +++ b/x-pack/plugin/ml/qa/single-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/LearningToRankRescorerIT.java @@ -16,7 +16,9 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import java.util.Map; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -28,189 +30,11 @@ public class LearningToRankRescorerIT extends InferenceTestCase { @Before public void setupModelAndData() throws IOException { - putRegressionModel(MODEL_ID, """ - { - "description": "super complex model for tests", - "inference_config": { - "learning_to_rank": { - "feature_extractors": [ - { - "query_extractor": { - "feature_name": "cost", - "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}} - } - }, - { - "query_extractor": { - "feature_name": "type_tv", - "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "type_vcr", - "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "type_laptop", - "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}} - } - }, - { - "query_extractor": { - "feature_name": "two", - "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } } - } - }, - { - "query_extractor": { - "feature_name": "product_bm25", - "query": { "term": { "product": "{{keyword}}" } } - } - } - ] - } - }, - "definition": { - "trained_model": { - "ensemble": { - "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"], - "target_type": "regression", - "trained_models": [ - { - "tree": { - "feature_names": [ - "cost" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 400, - "decision_type": "lte", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 5.0 - }, - { - "node_index": 2, - "leaf_value": 2.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "type_tv" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 12.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "two" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 2.0 - } - ], - "target_type": "regression" - } - }, - { - "tree": { - "feature_names": [ - "product_bm25" - ], - "tree_structure": [ - { - "node_index": 0, - "split_feature": 0, - "split_gain": 12, - "threshold": 1, - "decision_type": "lt", - "default_left": true, - "left_child": 1, - "right_child": 2 - }, - { - "node_index": 1, - "leaf_value": 1.0 - }, - { - "node_index": 2, - "leaf_value": 4.0 - } - ], - "target_type": "regression" - } - } - ] - } - } - } - } - """); - createIndex(INDEX_NAME, Settings.EMPTY, """ - "properties":{ - "product":{"type": "keyword"}, - "cost":{"type": "integer"} - }"""); - indexData("{ \"product\": \"TV\", \"cost\": 300}"); - indexData("{ \"product\": \"TV\", \"cost\": 400}"); - indexData("{ \"product\": \"TV\", \"cost\": 600}"); - indexData("{ \"product\": \"VCR\", \"cost\": 15}"); - indexData("{ \"product\": \"VCR\", \"cost\": 350}"); - indexData("{ \"product\": \"VCR\", \"cost\": 580}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 100}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 300}"); - indexData("{ \"product\": \"Laptop\", \"cost\": 500}"); + putRegressionModel(MODEL_ID, testRegressionModel); + createIndex(INDEX_NAME, Settings.EMPTY, testIndexDefinition); + for (String testDataItem : testIndexData) { + indexData(testDataItem); + } adminClient().performRequest(new Request("POST", INDEX_NAME + "/_refresh")); } @@ -249,6 +73,19 @@ public void testLearningToRankRescore() throws Exception { assertHitScores(client().performRequest(request), List.of(9.0, 9.0, 6.0)); } + public void testLearningToRankRescoreWithExplain() throws Exception { + Request request = new Request("GET", "store/_search?size=3&explain=true&error_trace"); + request.setJsonEntity(""" + { + "rescore": { + "window_size": 10, + "learning_to_rank": { "model_id": "ltr-model" } + } + }"""); + var response = client().performRequest(request); + assertExplainExtractedFeatures(response, List.of("type_tv", "cost", "two")); + } + public void testLearningToRankRescoreSmallWindow() throws Exception { Request request = new Request("GET", "store/_search?size=5"); request.setJsonEntity(""" @@ -336,4 +173,234 @@ private void indexData(String data) throws IOException { private static void assertHitScores(Response response, List expectedScores) throws IOException { assertThat((List) XContentMapValues.extractValue("hits.hits._score", responseAsMap(response)), equalTo(expectedScores)); } + + @SuppressWarnings("unchecked") + private static void assertExplainExtractedFeatures(Response response, List expectedFeatures) throws IOException { + var explainValues = (ArrayList>) XContentMapValues.extractValue( + "hits.hits._explanation", + responseAsMap(response) + ); + + assertThat(explainValues.size(), equalTo(3)); + for (Map hit : explainValues) { + assertThat(hit.get("description"), equalTo("rescored using LTR model ltr-model")); + + var queryDetails = (ArrayList>) hit.get("details"); + assertThat(queryDetails.size(), equalTo(2)); + + assertThat(queryDetails.get(0).get("description"), equalTo("first pass query score")); + assertThat(queryDetails.get(1).get("description"), equalTo("extracted features")); + + var featureDetails = new ArrayList<>((ArrayList>) queryDetails.get(1).get("details")); + assertThat(featureDetails.size(), equalTo(3)); + + var missingKeys = new ArrayList(); + for (String expectedFeature : expectedFeatures) { + var expectedDescription = Strings.format("feature value for [%s]", expectedFeature); + + var wasFound = false; + for (Map detailItem : featureDetails) { + if (detailItem.get("description").equals(expectedDescription)) { + featureDetails.remove(detailItem); + wasFound = true; + break; + } + } + + if (wasFound == false) { + missingKeys.add(expectedFeature); + } + } + + assertThat(Strings.format("Could not find features: [%s]", String.join(", ", missingKeys)), featureDetails.size(), equalTo(0)); + } + } + + private static String testIndexDefinition = """ + "properties":{ + "product":{"type": "keyword"}, + "cost":{"type": "integer"} + }"""; + + private static List testIndexData = List.of( + "{ \"product\": \"TV\", \"cost\": 300}", + "{ \"product\": \"TV\", \"cost\": 400}", + "{ \"product\": \"TV\", \"cost\": 600}", + "{ \"product\": \"VCR\", \"cost\": 15}", + "{ \"product\": \"VCR\", \"cost\": 350}", + "{ \"product\": \"VCR\", \"cost\": 580}", + "{ \"product\": \"Laptop\", \"cost\": 100}", + "{ \"product\": \"Laptop\", \"cost\": 300}", + "{ \"product\": \"Laptop\", \"cost\": 500}" + ); + + private static String testRegressionModel = """ + { + "description": "super complex model for tests", + "inference_config": { + "learning_to_rank": { + "feature_extractors": [ + { + "query_extractor": { + "feature_name": "cost", + "query": {"script_score": {"query": {"match_all":{}}, "script": {"source": "return doc['cost'].value;"}}} + } + }, + { + "query_extractor": { + "feature_name": "type_tv", + "query": {"constant_score": {"filter": {"term": { "product": "TV" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "type_vcr", + "query": {"constant_score": {"filter": {"term": { "product": "VCR" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "type_laptop", + "query": {"constant_score": {"filter": {"term": { "product": "Laptop" }}, "boost": 1.0}} + } + }, + { + "query_extractor": { + "feature_name": "two", + "query": { "script_score": { "query": { "match_all": {} }, "script": { "source": "return 2.0;" } } } + } + }, + { + "query_extractor": { + "feature_name": "product_bm25", + "query": { "term": { "product": "{{keyword}}" } } + } + } + ] + } + }, + "definition": { + "trained_model": { + "ensemble": { + "feature_names": ["cost", "type_tv", "type_vcr", "type_laptop", "two", "product_bm25"], + "target_type": "regression", + "trained_models": [ + { + "tree": { + "feature_names": [ + "cost" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 400, + "decision_type": "lte", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 5.0 + }, + { + "node_index": 2, + "leaf_value": 2.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "type_tv" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 12.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "two" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 2.0 + } + ], + "target_type": "regression" + } + }, + { + "tree": { + "feature_names": [ + "product_bm25" + ], + "tree_structure": [ + { + "node_index": 0, + "split_feature": 0, + "split_gain": 12, + "threshold": 1, + "decision_type": "lt", + "default_left": true, + "left_child": 1, + "right_child": 2 + }, + { + "node_index": 1, + "leaf_value": 1.0 + }, + { + "node_index": 2, + "leaf_value": 4.0 + } + ], + "target_type": "regression" + } + } + ] + } + } + } + } + """; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java index 8a310ba2719f..70d0b980bb3b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/ltr/LearningToRankRescorer.java @@ -28,6 +28,7 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import static java.util.stream.Collectors.toUnmodifiableSet; @@ -129,8 +130,58 @@ public TopDocs rescore(TopDocs topDocs, IndexSearcher searcher, RescoreContext r @Override public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext, Explanation sourceExplanation) throws IOException { - // TODO: Call infer again but with individual feature importance values and explaining the model (which features are used, etc.) - return null; + if (sourceExplanation == null) { + return Explanation.noMatch("no match found"); + } + + LearningToRankRescorerContext ltrContext = (LearningToRankRescorerContext) rescoreContext; + LocalModel localModelDefinition = ltrContext.regressionModelDefinition; + + if (localModelDefinition == null) { + throw new IllegalStateException("local model reference is null, missing rewriteAndFetch before rescore phase?"); + } + + List leaves = ltrContext.executionContext.searcher().getIndexReader().leaves(); + + int endDoc = 0; + int readerUpto = -1; + LeafReaderContext currentSegment = null; + + while (topLevelDocId >= endDoc) { + readerUpto++; + currentSegment = leaves.get(readerUpto); + endDoc = currentSegment.docBase + currentSegment.reader().maxDoc(); + } + + assert currentSegment != null : "Unexpected null segment"; + + int targetDoc = topLevelDocId - currentSegment.docBase; + + List featureExtractors = ltrContext.buildFeatureExtractors(searcher); + int featureSize = featureExtractors.stream().mapToInt(fe -> fe.featureNames().size()).sum(); + + Map features = Maps.newMapWithExpectedSize(featureSize); + + for (FeatureExtractor featureExtractor : featureExtractors) { + featureExtractor.setNextReader(currentSegment); + featureExtractor.addFeatures(features, targetDoc); + } + + // Predicting the value + var ltrScore = ((Number) localModelDefinition.inferLtr(features, ltrContext.learningToRankConfig).predictedValue()).floatValue(); + + List featureExplanations = new ArrayList<>(); + for (String featureName : features.keySet()) { + Number featureValue = Objects.requireNonNullElse((Number) features.get(featureName), 0); + featureExplanations.add(Explanation.match(featureValue, "feature value for [" + featureName + "]")); + } + + return Explanation.match( + ltrScore, + "rescored using LTR model " + ltrContext.regressionModelDefinition.getModelId(), + Explanation.match(sourceExplanation.getValue(), "first pass query score", sourceExplanation), + Explanation.match(0f, "extracted features", featureExplanations) + ); } /** Returns a new {@link TopDocs} with the topN from the incoming one, or the same TopDocs if the number of hits is already <= From e36b5551ab418860e9db84cdaa96dc2e0df30d6b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 29 Aug 2024 09:44:15 -0400 Subject: [PATCH 22/30] ESQL: Method to convert BooleanBlock to a "mask" (#112253) This adds a method, `BooleanBlock#toMask` to convert `BooleanBlock`s into a "mask" for use with `keepMask`. --- .../compute/data/BooleanArrayBlock.java | 21 ++++++ .../compute/data/BooleanBigArrayBlock.java | 21 ++++++ .../compute/data/BooleanBlock.java | 7 ++ .../compute/data/BooleanVectorBlock.java | 6 ++ .../compute/data/ConstantNullBlock.java | 5 ++ .../elasticsearch/compute/data/ToMask.java | 22 +++++++ .../compute/data/X-ArrayBlock.java.st | 22 +++++++ .../compute/data/X-BigArrayBlock.java.st | 23 +++++++ .../compute/data/X-Block.java.st | 10 ++- .../compute/data/X-Vector.java.st | 2 +- .../compute/data/X-VectorBlock.java.st | 9 ++- .../compute/data/BasicBlockTests.java | 41 +++++++++++- .../data/BigArrayBlockBuilderTests.java | 66 +++++++++++++++++++ .../compute/data/BigArrayVectorTests.java | 6 ++ .../compute/data/BlockMultiValuedTests.java | 49 ++++++++++++++ 15 files changed, 306 insertions(+), 4 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java index 14f6c9591ed1..3d600bec1bd6 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java @@ -85,6 +85,27 @@ public BooleanVector asVector() { return null; } + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java index 5342728af4fe..f353512eb93b 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBigArrayBlock.java @@ -86,6 +86,27 @@ public BooleanVector asVector() { return null; } + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java index 566b8fbed445..5d2d6c97a11f 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanBlock.java @@ -37,6 +37,13 @@ public sealed interface BooleanBlock extends Block permits BooleanArrayBlock, Bo @Override BooleanVector asVector(); + /** + * Convert this to a {@link BooleanVector "mask"} that's appropriate for + * passing to {@link #keepMask}. Null and multivalued positions will be + * converted to {@code false}. + */ + ToMask toMask(); + @Override BooleanBlock filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java index ca2fc58bf0bb..1544cc3355cd 100644 --- a/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java +++ b/x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanVectorBlock.java @@ -31,6 +31,12 @@ public BooleanVector asVector() { return vector; } + @Override + public ToMask toMask() { + vector.incRef(); + return new ToMask(vector, false); + } + @Override public boolean getBoolean(int valueIndex) { return vector.getBoolean(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index fc4cdc1d41f4..3d61613ba70e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -48,6 +48,11 @@ public OrdinalBytesRefBlock asOrdinals() { return null; } + @Override + public ToMask toMask() { + return new ToMask(blockFactory.newConstantBooleanVector(false, positionCount), false); + } + @Override public boolean isNull(int position) { return true; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java new file mode 100644 index 000000000000..5b71679048e2 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ToMask.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.data; + +import org.elasticsearch.core.Releasable; + +/** + * Result from calling {@link BooleanBlock#toMask}. {@link #close closing} this will + * close the contained {@link #mask()}. If you want to keep a reference to it then you'll + * have to {@link Block#incRef()} it. + */ +public record ToMask(BooleanVector mask, boolean hadMultivaluedFields) implements Releasable { + @Override + public void close() { + mask.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st index 750de95e7b8d..e855e6d6296d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st @@ -101,6 +101,28 @@ $if(BytesRef)$ public OrdinalBytesRefBlock asOrdinals() { return null; } + +$elseif(boolean)$ + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } $endif$ @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st index bf9e6fec1872..23632bf41349 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-BigArrayBlock.java.st @@ -86,6 +86,29 @@ public final class $Type$BigArrayBlock extends AbstractArrayBlock implements $Ty return null; } +$if(boolean)$ + @Override + public ToMask toMask() { + if (getPositionCount() == 0) { + return new ToMask(blockFactory().newConstantBooleanVector(false, 0), false); + } + try (BooleanVector.FixedBuilder builder = blockFactory().newBooleanVectorFixedBuilder(getPositionCount())) { + boolean hasMv = false; + for (int p = 0; p < getPositionCount(); p++) { + builder.appendBoolean(switch (getValueCount(p)) { + case 0 -> false; + case 1 -> getBoolean(getFirstValueIndex(p)); + default -> { + hasMv = true; + yield false; + } + }); + } + return new ToMask(builder.build(), hasMv); + } + } +$endif$ + @Override public $type$ get$Type$(int valueIndex) { return vector.get$Type$(valueIndex); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st index da0769af2d18..67e4ac4bb334 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Block.java.st @@ -63,8 +63,16 @@ $if(BytesRef)$ * returns null. Callers must not release the returned block as no extra reference is retained by this method. */ OrdinalBytesRefBlock asOrdinals(); -$endif$ +$elseif(boolean)$ + /** + * Convert this to a {@link BooleanVector "mask"} that's appropriate for + * passing to {@link #keepMask}. Null and multivalued positions will be + * converted to {@code false}. + */ + ToMask toMask(); + +$endif$ @Override $Type$Block filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st index 09f11f350439..e19c1788cdb6 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-Vector.java.st @@ -51,8 +51,8 @@ $if(BytesRef)$ * returns null. Callers must not release the returned vector as no extra reference is retained by this method. */ OrdinalBytesRefVector asOrdinals(); -$endif$ +$endif$ @Override $Type$Vector filter(int... positions); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st index eec75f62f22f..d4c6859e64b2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-VectorBlock.java.st @@ -44,8 +44,15 @@ $if(BytesRef)$ return null; } } -$endif$ +$elseif(boolean)$ + @Override + public ToMask toMask() { + vector.incRef(); + return new ToMask(vector, false); + } + +$endif$ @Override $if(BytesRef)$ public BytesRef getBytesRef(int valueIndex, BytesRef dest) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java index e8401048af01..ad372da47d6b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java @@ -800,6 +800,12 @@ public void testBooleanBlock() { } assertLookup(block, positions(blockFactory, positionCount + 1000), singletonList(null)); assertEmptyLookup(blockFactory, block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(p % 10 == 0)); + } + } try (BooleanBlock.Builder blockBuilder = blockFactory.newBooleanBlockBuilder(1)) { BooleanBlock copy = blockBuilder.copyFrom(block, 0, block.getPositionCount()).build(); @@ -826,6 +832,7 @@ public void testBooleanBlock() { IntStream.range(0, positionCount).mapToObj(ii -> randomBoolean()).forEach(vectorBuilder::appendBoolean); BooleanVector vector = vectorBuilder.build(); assertSingleValueDenseBlock(vector.asBlock()); + assertToMask(vector); releaseAndAssertBreaker(vector.asBlock()); } } @@ -1358,6 +1365,19 @@ void assertNullValues( assertTrue(block.isNull(randomNullPosition)); assertFalse(block.isNull(randomNonNullPosition)); releaseAndAssertBreaker(block); + if (block instanceof BooleanBlock bb) { + try (ToMask mask = bb.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(nullsMask.get(p) == false && p % 10 == 0)); + } + } + } + } + + void assertZeroPositionsAndRelease(BooleanBlock block) { + assertToMaskZeroPositions(block); + assertZeroPositionsAndRelease((Block) block); } void assertZeroPositionsAndRelease(Block block) { @@ -1366,6 +1386,11 @@ void assertZeroPositionsAndRelease(Block block) { releaseAndAssertBreaker(block); } + void assertZeroPositionsAndRelease(BooleanVector vector) { + assertToMask(vector); + assertZeroPositionsAndRelease((Vector) vector); + } + void assertZeroPositionsAndRelease(Vector vector) { assertThat(vector.getPositionCount(), is(0)); assertKeepMaskEmpty(vector); @@ -1386,6 +1411,20 @@ static void assertKeepMaskEmpty(Vector vector) { } } + static void assertToMaskZeroPositions(BooleanBlock block) { + try (ToMask mask = block.toMask()) { + assertThat(mask.mask().getPositionCount(), equalTo(0)); + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + } + } + + static void assertToMask(BooleanVector vector) { + try (ToMask mask = vector.asBlock().toMask()) { + assertThat(mask.mask(), sameInstance(vector)); + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + } + } + void releaseAndAssertBreaker(Block... blocks) { assertThat(breaker.getUsed(), greaterThan(0L)); Page[] pages = Arrays.stream(blocks).map(Page::new).toArray(Page[]::new); @@ -1836,7 +1875,7 @@ static void assertKeepMask(Block block) { /** * Build a random valid "mask" of single valued boolean fields that. */ - private static BooleanVector randomMask(int positions) { + static BooleanVector randomMask(int positions) { try (BooleanVector.Builder builder = TestBlockFactory.getNonBreakingInstance().newBooleanVectorFixedBuilder(positions)) { for (int i = 0; i < positions; i++) { builder.appendBoolean(randomBoolean()); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java index df32dcaddd92..34d591cd87d8 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayBlockBuilderTests.java @@ -164,6 +164,12 @@ public void testBooleanVector() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(elements[p])); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanVectorBlock.class)); assertThat(block.asVector(), instanceOf(BooleanArrayVector.class)); @@ -224,6 +230,12 @@ public void testBooleanBlock() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanArrayBlock.class)); assertNull(copy.asVector()); @@ -253,6 +265,12 @@ public void testBooleanBlock() throws IOException { assertThat(block.getBoolean(i), equalTo(elements[i])); } assertKeepMask(block); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < elements.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + } + } try (var copy = serializeDeserializeBlock(block)) { assertThat(copy, instanceOf(BooleanBigArrayBlock.class)); assertNull(block.asVector()); @@ -266,4 +284,52 @@ public void testBooleanBlock() throws IOException { } assertThat(blockFactory.breaker().getUsed(), equalTo(0L)); } + + /** + * Tests a block with one value being multivalued and the rest are single valued. + */ + public void testBooleanBlockOneMv() { + int mvCount = between(2, 10); + int positionCount = randomIntBetween(1000, 5000); + blockFactory = new BlockFactory(blockFactory.breaker(), blockFactory.bigArrays(), ByteSizeValue.ofBytes(1)); + try (var builder = blockFactory.newBooleanBlockBuilder(between(1, mvCount + positionCount))) { + boolean[] elements = new boolean[positionCount + mvCount]; + builder.beginPositionEntry(); + for (int i = 0; i < mvCount; i++) { + elements[i] = randomBoolean(); + builder.appendBoolean(elements[i]); + } + builder.endPositionEntry(); + for (int p = 1; p < positionCount; p++) { + elements[mvCount + p] = randomBoolean(); + builder.appendBoolean(elements[mvCount + p]); + } + try (var block = builder.build()) { + assertThat(block, instanceOf(BooleanBigArrayBlock.class)); + assertNull(block.asVector()); + assertThat(block.getPositionCount(), equalTo(positionCount)); + assertThat(block.getValueCount(0), equalTo(mvCount)); + for (int i = 0; i < mvCount; i++) { + assertThat(block.getBoolean(block.getFirstValueIndex(0) + i), equalTo(elements[i])); + } + for (int p = 1; p < positionCount; p++) { + assertThat(block.getValueCount(p), equalTo(1)); + assertThat(block.getBoolean(block.getFirstValueIndex(p)), equalTo(elements[mvCount + p])); + } + assertKeepMask(block); + try (ToMask mask = block.toMask()) { + /* + * NOTE: this test is customized to the layout above where we don't make + * any fields with 0 values. + */ + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + assertThat(mask.mask().getBoolean(0), equalTo(false)); + for (int p = 1; p < positionCount; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(elements[mvCount + p])); + } + } + } + } + assertThat(blockFactory.breaker().getUsed(), equalTo(0L)); + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java index af4c643a9062..aab8b86f9b79 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java @@ -72,6 +72,12 @@ public void testBoolean() throws IOException { assertEmptyLookup(blockFactory, vector.asBlock()); assertSerialization(block); assertThat(vector.toString(), containsString("BooleanBigArrayVector[positions=" + positionCount)); + try (ToMask mask = block.toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(false)); + for (int p = 0; p < values.length; p++) { + assertThat(mask.mask().getBoolean(p), equalTo(values[p])); + } + } } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java index c5e130726844..e37b2638b56f 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BlockMultiValuedTests.java @@ -31,6 +31,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.nullValue; public class BlockMultiValuedTests extends ESTestCase { @ParametersFactory @@ -122,6 +123,54 @@ public void testLookupFromSingleManyPages() { assertLookup(ByteSizeValue.ofBytes(1), between(1, 32), p -> 1); } + public void testToMask() { + if (elementType != ElementType.BOOLEAN) { + return; + } + int positionCount = randomIntBetween(1, 16 * 1024); + var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 2, 10, 0, 0); + try (ToMask mask = ((BooleanBlock) b.block()).toMask()) { + assertThat(mask.hadMultivaluedFields(), equalTo(true)); + for (int p = 0; p < b.values().size(); p++) { + List v = b.values().get(p); + if (v == null) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + continue; + } + if (v.size() != 1) { + assertThat(mask.mask().getBoolean(p), equalTo(false)); + continue; + } + assertThat(mask.mask().getBoolean(p), equalTo(v.get(0))); + } + } finally { + b.block().close(); + } + } + + public void testMask() { + int positionCount = randomIntBetween(1, 16 * 1024); + var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0); + try ( + BooleanVector mask = BasicBlockTests.randomMask(b.values().size() + between(0, 1000)); + Block masked = b.block().keepMask(mask) + ) { + for (int p = 0; p < b.values().size(); p++) { + List inputValues = b.values().get(p); + List valuesAtPosition = BasicBlockTests.valuesAtPositions(masked, p, p + 1).get(0); + if (inputValues == null || mask.getBoolean(p) == false) { + assertThat(masked.isNull(p), equalTo(true)); + assertThat(valuesAtPosition, nullValue()); + continue; + } + assertThat(masked.isNull(p), equalTo(false)); + assertThat(valuesAtPosition, equalTo(inputValues)); + } + } finally { + b.block().close(); + } + } + private void assertFiltered(boolean all, boolean shuffled) { int positionCount = randomIntBetween(1, 16 * 1024); var b = BasicBlockTests.randomBlock(blockFactory(), elementType, positionCount, nullAllowed, 0, 10, 0, 0); From 68b211e025f2222704e50b2f6b6890dbf8f94515 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Thu, 29 Aug 2024 16:01:56 +0200 Subject: [PATCH 23/30] Store original source for keywords using a normalizer (#112151) Using a normalizer for a keyword field might result in not being able to reconstruct the original source when using synthetic source. Here if synthetic source is enabled and a normalizer is configured we store the original value in a stored field which is later used at document reconstruction time to reconstruct the field value as it was in the original document. We use the same fallback solution we use in other places like `ignore_malformed`. --- docs/changelog/112151.yaml | 5 ++ .../test/mget/90_synthetic_source.yml | 88 +++++++++++++++++++ .../index/mapper/KeywordFieldMapper.java | 19 ++-- .../index/mapper/MapperFeatures.java | 1 + .../KeywordFieldSyntheticSourceSupport.java | 9 +- 5 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 docs/changelog/112151.yaml diff --git a/docs/changelog/112151.yaml b/docs/changelog/112151.yaml new file mode 100644 index 000000000000..f5cbfd8da07c --- /dev/null +++ b/docs/changelog/112151.yaml @@ -0,0 +1,5 @@ +pr: 112151 +summary: Store original source for keywords using a normalizer +area: Logs +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml index 2935c0c1c41b..ff17a92ed0fc 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml @@ -46,6 +46,94 @@ keyword: docs.1._source: kwd: bar +--- +keyword with normalizer: + - requires: + cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ] + reason: support for normalizer on keyword fields + - do: + indices.create: + index: test-keyword-with-normalizer + body: + settings: + analysis: + normalizer: + lowercase: + type: custom + filter: + - lowercase + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + normalizer: lowercase + keyword_with_ignore_above: + type: keyword + normalizer: lowercase + ignore_above: 10 + keyword_without_doc_values: + type: keyword + normalizer: lowercase + doc_values: false + + - do: + index: + index: test-keyword-with-normalizer + id: 1 + body: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - do: + index: + index: test-keyword-with-normalizer + id: 2 + body: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - do: + index: + index: test-keyword-with-normalizer + id: 3 + body: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + + - do: + mget: + index: test-keyword-with-normalizer + body: + ids: [ 1, 2, 3 ] + - match: { docs.0._index: "test-keyword-with-normalizer" } + - match: { docs.0._id: "1" } + - match: + docs.0._source: + keyword: "the Quick Brown Fox jumps over the lazy Dog" + keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog" + keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog" + + - match: { docs.1._index: "test-keyword-with-normalizer" } + - match: { docs.1._id: "2" } + - match: + docs.1._source: + keyword: "The five BOXING wizards jump Quickly" + keyword_with_ignore_above: "The five BOXING wizards jump Quickly" + keyword_without_doc_values: "The five BOXING wizards jump Quickly" + + - match: { docs.2._index: "test-keyword-with-normalizer" } + - match: { docs.2._id: "3" } + - match: + docs.2._source: + keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ] + --- stored text: - requires: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 9645b4397df4..d130f37c3e8e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "keyword"; static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above"); + static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source"); public static class Defaults { public static final FieldType FIELD_TYPE; @@ -856,7 +857,7 @@ public boolean hasNormalizer() { private final Script script; private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - private final boolean storeIgnored; + private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; @@ -866,7 +867,7 @@ private KeywordFieldMapper( KeywordFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, - boolean storeIgnored, + boolean isSyntheticSource, Builder builder ) { super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue()); @@ -881,7 +882,7 @@ private KeywordFieldMapper( this.indexAnalyzers = builder.indexAnalyzers; this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; - this.storeIgnored = storeIgnored; + this.isSyntheticSource = isSyntheticSource; } @Override @@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) { if (value.length() > fieldType().ignoreAbove()) { context.addIgnoredField(fullPath()); - if (storeIgnored) { + if (isSyntheticSource) { // Save a copy of the field so synthetic source can load it context.doc().add(new StoredField(originalName(), new BytesRef(value))); } @@ -1026,6 +1027,11 @@ private String originalName() { @Override protected SyntheticSourceMode syntheticSourceMode() { + if (hasNormalizer()) { + // NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value + // whose doc values would be altered by the normalizer + return SyntheticSourceMode.FALLBACK; + } if (fieldType.stored() || hasDocValues) { return SyntheticSourceMode.NATIVE; } @@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName) "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - if (hasNormalizer()) { - throw new IllegalArgumentException( - "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer" - ); - } if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) { return super.syntheticFieldLoader(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 6dce9d6c7b86..63bbef061c61 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -33,6 +33,7 @@ public Set getFeatures() { NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS, BooleanFieldMapper.BOOLEAN_DIMENSION, ObjectMapper.SUBOBJECTS_AUTO, + KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE, SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX ); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java index 6abe92385131..2f452161b10c 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java @@ -21,8 +21,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.hamcrest.Matchers.equalTo; - public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport { private final Integer ignoreAbove; private final boolean allIgnored; @@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException { @Override public List invalidExample() throws IOException { - return List.of( - new MapperTestCase.SyntheticSourceInvalidExample( - equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"), - b -> b.field("type", "keyword").field("normalizer", "lowercase") - ) - ); + return List.of(); } } From 1be4f65da2ac35d971626353b8a076aa75a7b693 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 29 Aug 2024 15:20:03 +0100 Subject: [PATCH 24/30] Add constants for UUID lengths (#112353) Our UUID strings have fixed lengths (depending on the type of UUID). Sometimes we might want code to rely on knowing these lengths rather than doing some other string manipulations to look for a boundary. This commit exposes constants for these things. --- .../common/RandomBasedUUIDGenerator.java | 4 +- .../common/TimeBasedUUIDGenerator.java | 4 +- .../java/org/elasticsearch/common/UUIDs.java | 40 +++++++++++++++---- .../org/elasticsearch/common/UUIDTests.java | 16 ++++++++ .../blobstore/RepositoryFileType.java | 5 ++- 5 files changed, 57 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java index e731cf3bc58b..58c23ab9aa39 100644 --- a/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/RandomBasedUUIDGenerator.java @@ -56,8 +56,10 @@ public static String getBase64UUID(Random random) { return Base64.getUrlEncoder().withoutPadding().encodeToString(getUUIDBytes(random)); } + static final int SIZE_IN_BYTES = 16; + private static byte[] getUUIDBytes(Random random) { - final byte[] randomBytes = new byte[16]; + final byte[] randomBytes = new byte[SIZE_IN_BYTES]; random.nextBytes(randomBytes); /* Set the version to version 4 (see http://www.ietf.org/rfc/rfc4122.txt) * The randomly or pseudo-randomly generated version. diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java index f7f7f520fec9..d66b0f579ce3 100644 --- a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java +++ b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java @@ -47,6 +47,8 @@ protected byte[] macAddress() { return SECURE_MUNGED_ADDRESS; } + static final int SIZE_IN_BYTES = 15; + @Override public String getBase64UUID() { final int sequenceId = sequenceNumber.incrementAndGet() & 0xffffff; @@ -61,7 +63,7 @@ public String getBase64UUID() { sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max ); - final byte[] uuidBytes = new byte[15]; + final byte[] uuidBytes = new byte[SIZE_IN_BYTES]; int i = 0; // We have auto-generated ids, which are usually used for append-only workloads. diff --git a/server/src/main/java/org/elasticsearch/common/UUIDs.java b/server/src/main/java/org/elasticsearch/common/UUIDs.java index 43a232e82510..ebc0978f38d4 100644 --- a/server/src/main/java/org/elasticsearch/common/UUIDs.java +++ b/server/src/main/java/org/elasticsearch/common/UUIDs.java @@ -17,26 +17,50 @@ public class UUIDs { private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator(); private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator(); - /** Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as - * primary key. The id is opaque and the implementation is free to change at any time! */ + /** + * The length of a UUID string generated by {@link #base64UUID}. + */ + // A 15-byte time-based UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding). + public static final int TIME_BASED_UUID_STRING_LENGTH = 20; + + /** + * Generates a time-based UUID (similar to Flake IDs), which is preferred when generating an ID to be indexed into a Lucene index as + * primary key. The id is opaque and the implementation is free to change at any time! + * The resulting string has length {@link #TIME_BASED_UUID_STRING_LENGTH}. + */ public static String base64UUID() { return TIME_UUID_GENERATOR.getBase64UUID(); } - /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using the - * provided {@code Random} instance */ + /** + * The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}. + */ + // A 16-byte v4 UUID is base64-encoded as 5 3-byte chunks (each becoming 4 chars after encoding) plus another byte (becomes 2 chars). + public static final int RANDOM_BASED_UUID_STRING_LENGTH = 22; + + /** + * Returns a Base64 encoded string representing a RFC4122 version 4 UUID, using the + * provided {@code Random} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static String randomBase64UUID(Random random) { return RandomBasedUUIDGenerator.getBase64UUID(random); } - /** Returns a Base64 encoded version of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, using a - * private {@code SecureRandom} instance */ + /** + * Returns a Base64 encoded string representing a RFC4122 version 4 UUID, using a + * private {@code SecureRandom} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static String randomBase64UUID() { return RANDOM_UUID_GENERATOR.getBase64UUID(); } - /** Returns a Base64 encoded {@link SecureString} of a Version 4.0 compatible UUID as defined here: http://www.ietf.org/rfc/rfc4122.txt, - * using a private {@code SecureRandom} instance */ + /** + * Returns a Base64 encoded {@link SecureString} representing a RFC4122 version 4 + * UUID, using a private {@code SecureRandom} instance. + * The resulting string has length {@link #RANDOM_BASED_UUID_STRING_LENGTH}. + */ public static SecureString randomBase64UUIDSecureString() { return RandomBasedUUIDGenerator.getBase64UUIDSecureString(); } diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java index 5af036ce0648..3229049b67b4 100644 --- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java +++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java @@ -176,4 +176,20 @@ protected byte[] macAddress() { ); return bytesPerDoc; } + + public void testStringLength() { + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(RandomBasedUUIDGenerator.SIZE_IN_BYTES)); + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID().length()); + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, UUIDs.randomBase64UUID(random()).length()); + try (var secureString = UUIDs.randomBase64UUIDSecureString()) { + assertEquals(UUIDs.RANDOM_BASED_UUID_STRING_LENGTH, secureString.toString().length()); + } + + assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, getUnpaddedBase64StringLength(TimeBasedUUIDGenerator.SIZE_IN_BYTES)); + assertEquals(UUIDs.TIME_BASED_UUID_STRING_LENGTH, UUIDs.base64UUID().length()); + } + + private static int getUnpaddedBase64StringLength(int sizeInBytes) { + return (int) Math.ceil(sizeInBytes * 4.0 / 3.0); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java index 014cbcd2bcc3..8d20cce33bbb 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/RepositoryFileType.java @@ -9,6 +9,7 @@ package org.elasticsearch.repositories.blobstore; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.UUIDs; import java.nio.file.Path; import java.util.regex.Pattern; @@ -38,9 +39,9 @@ public enum RepositoryFileType { // decimal numbers .replace("NUM", "(0|[1-9][0-9]*)") // 15-byte UUIDS from TimeBasedUUIDGenerator - .replace("SHORTUUID", "[0-9a-zA-Z_-]{20}") + .replace("SHORTUUID", "[0-9a-zA-Z_-]{" + UUIDs.TIME_BASED_UUID_STRING_LENGTH + "}") // 16-byte UUIDs from RandomBasedUUIDGenerator - .replace("UUID", "[0-9a-zA-Z_-]{22}") + .replace("UUID", "[0-9a-zA-Z_-]{" + UUIDs.RANDOM_BASED_UUID_STRING_LENGTH + "}") + ")$" ); } From 9e01181f0dd9bf6ea919164f8929f0874ad6e26d Mon Sep 17 00:00:00 2001 From: Albert Zaharovits Date: Thu, 29 Aug 2024 17:33:05 +0300 Subject: [PATCH 25/30] Remove unused cluster blocks in create index (#112352) Remove unused cluster blocks in create index --- .../CreateIndexClusterStateUpdateRequest.java | 9 ------- .../metadata/MetadataCreateIndexService.java | 24 ++++--------------- .../MetadataCreateIndexServiceTests.java | 10 +------- 3 files changed, 6 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java index 8a46daa45e73..948199fbe74f 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/create/CreateIndexClusterStateUpdateRequest.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.admin.indices.shrink.ResizeType; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.cluster.ack.ClusterStateUpdateRequest; -import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.metadata.ComposableIndexTemplate; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.settings.Settings; @@ -43,8 +42,6 @@ public class CreateIndexClusterStateUpdateRequest extends ClusterStateUpdateRequ private final Set aliases = new HashSet<>(); - private final Set blocks = new HashSet<>(); - private ActiveShardCount waitForActiveShards = ActiveShardCount.DEFAULT; private boolean performReroute = true; @@ -125,10 +122,6 @@ public Set aliases() { return aliases; } - public Set blocks() { - return blocks; - } - public Index recoverFrom() { return recoverFrom; } @@ -229,8 +222,6 @@ public String toString() { + settings + ", aliases=" + aliases - + ", blocks=" - + blocks + ", waitForActiveShards=" + waitForActiveShards + ", systemDataStreamDescriptor=" diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index b1a19d99dcb1..07dcb7baf077 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -25,7 +25,6 @@ import org.elasticsearch.cluster.AckedClusterStateUpdateTask; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; -import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -514,7 +513,6 @@ private ClusterState applyCreateIndexWithTemporaryService( ClusterState updated = clusterStateCreateIndex( currentState, - request.blocks(), indexMetadata, metadataTransformer, allocationService.getShardRoutingRoleStrategy() @@ -1231,7 +1229,6 @@ public static List resolveAndValidateAliases( */ static ClusterState clusterStateCreateIndex( ClusterState currentState, - Set clusterBlocks, IndexMetadata indexMetadata, BiConsumer metadataTransformer, ShardRoutingRoleStrategy shardRoutingRoleStrategy @@ -1245,14 +1242,13 @@ static ClusterState clusterStateCreateIndex( newMetadata = currentState.metadata().withAddedIndex(indexMetadata); } - String indexName = indexMetadata.getIndex().getName(); - ClusterBlocks.Builder blocks = createClusterBlocksBuilder(currentState, indexName, clusterBlocks); - blocks.updateBlocks(indexMetadata); + var blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); + blocksBuilder.updateBlocks(indexMetadata); - RoutingTable.Builder routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) - .addAsNew(newMetadata.index(indexName)); + var routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) + .addAsNew(newMetadata.index(indexMetadata.getIndex().getName())); - return ClusterState.builder(currentState).blocks(blocks).metadata(newMetadata).routingTable(routingTableBuilder).build(); + return ClusterState.builder(currentState).blocks(blocksBuilder).metadata(newMetadata).routingTable(routingTableBuilder).build(); } static IndexMetadata buildIndexMetadata( @@ -1325,16 +1321,6 @@ private static IndexMetadata.Builder createIndexMetadataBuilder( return builder; } - private static ClusterBlocks.Builder createClusterBlocksBuilder(ClusterState currentState, String index, Set blocks) { - ClusterBlocks.Builder blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); - if (blocks.isEmpty() == false) { - for (ClusterBlock block : blocks) { - blocksBuilder.addIndexBlock(index, block); - } - } - return blocksBuilder; - } - private static void updateIndexMappingsAndBuildSortOrder( IndexService indexService, CreateIndexClusterStateUpdateRequest request, diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 8a487e565362..f7d343b43b29 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -967,13 +967,7 @@ public void testClusterStateCreateIndexThrowsWriteIndexValidationException() thr assertThat( expectThrows( IllegalStateException.class, - () -> clusterStateCreateIndex( - currentClusterState, - Set.of(), - newIndex, - null, - TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY - ) + () -> clusterStateCreateIndex(currentClusterState, newIndex, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY) ).getMessage(), startsWith("alias [alias1] has more than one write index [") ); @@ -991,7 +985,6 @@ public void testClusterStateCreateIndex() { ClusterState updatedClusterState = clusterStateCreateIndex( currentClusterState, - Set.of(INDEX_READ_ONLY_BLOCK), newIndexMetadata, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY @@ -1037,7 +1030,6 @@ public void testClusterStateCreateIndexWithMetadataTransaction() { ClusterState updatedClusterState = clusterStateCreateIndex( currentClusterState, - Set.of(INDEX_READ_ONLY_BLOCK), newIndexMetadata, metadataTransformer, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY From cf0e18872878cce9332722c491b5cc7749106ae4 Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Thu, 29 Aug 2024 08:56:00 -0600 Subject: [PATCH 26/30] Add isAsync() to SearcTask and eliminate code for async detection from TransportSearchAction (#112311) --- .../action/search/SearchTask.java | 7 +++++ .../action/search/TransportSearchAction.java | 30 +------------------ .../xpack/search/AsyncSearchTask.java | 5 ++++ 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java index 3bf72313c4c2..cc5d60ad0b0c 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchTask.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchTask.java @@ -69,4 +69,11 @@ public Supplier getSearchResponseMergerSupplier() { public void setSearchResponseMergerSupplier(Supplier supplier) { this.searchResponseMergerSupplier = supplier; } + + /** + * Is this async search? + */ + public boolean isAsync() { + return false; + } } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 6e1645c1ed71..32ee9c331295 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -369,7 +369,7 @@ void executeRequest( } else { if ((listener instanceof TelemetryListener tl) && CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { tl.setRemotes(resolvedIndices.getRemoteClusterIndices().size()); - if (isAsyncSearchTask(task)) { + if (task.isAsync()) { tl.setFeature(CCSUsageTelemetry.ASYNC_FEATURE); } String client = task.getHeader(Task.X_ELASTIC_PRODUCT_ORIGIN_HTTP_HEADER); @@ -1514,34 +1514,6 @@ public SearchPhase newSearchPhase( } } - /** - * TransportSearchAction cannot access async-search code, so can't check whether this the Task - * is an instance of AsyncSearchTask, so this roundabout method is used - * @param searchTask SearchTask to analyze - * @return true if this is an async search task; false if a synchronous search task - */ - private boolean isAsyncSearchTask(SearchTask searchTask) { - assert assertAsyncSearchTaskListener(searchTask) : "AsyncSearchTask SearchProgressListener is not one of the expected types"; - // AsyncSearchTask will not return SearchProgressListener.NOOP, since it uses its own progress listener - // which delegates to CCSSingleCoordinatorSearchProgressListener when minimizing roundtrips. - // Only synchronous SearchTask uses SearchProgressListener.NOOP or CCSSingleCoordinatorSearchProgressListener directly - return searchTask.getProgressListener() != SearchProgressListener.NOOP - && searchTask.getProgressListener() instanceof CCSSingleCoordinatorSearchProgressListener == false; - } - - /** - * @param searchTask SearchTask to analyze - * @return true if AsyncSearchTask still uses its own special listener, not one of the two that synchronous SearchTask uses - */ - private boolean assertAsyncSearchTaskListener(SearchTask searchTask) { - if (searchTask.getClass().getSimpleName().contains("AsyncSearchTask")) { - SearchProgressListener progressListener = searchTask.getProgressListener(); - return progressListener != SearchProgressListener.NOOP - && progressListener instanceof CCSSingleCoordinatorSearchProgressListener == false; - } - return true; - } - private static void validateAndResolveWaitForCheckpoint( ClusterState clusterState, IndexNameExpressionResolver resolver, diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index c0305f873327..5068ac69e462 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -545,4 +545,9 @@ public void onFailure(Exception exc) { executeCompletionListeners(); } } + + @Override + public boolean isAsync() { + return true; + } } From 8f526098dbfcb109b8e5b01ee436e09491169025 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 29 Aug 2024 16:57:51 +0200 Subject: [PATCH 27/30] Fix SearchServiceTests not waiting for scroll clear (#111547) We were not waiting on the response here but assume the scrolls are cleared in the following lines. This worked as long as the transport action wasn't forking but is broken now that we fork to generic. Fixed by just waiting. closes #111529 --- .../test/java/org/elasticsearch/search/SearchServiceTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java index bdddea58b713..2617f82b09f0 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchServiceTests.java @@ -1548,7 +1548,7 @@ public void testMaxOpenScrollContexts() throws Exception { ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); clearScrollRequest.setScrollIds(clearScrollIds); - client().clearScroll(clearScrollRequest); + client().clearScroll(clearScrollRequest).get(); for (int i = 0; i < clearScrollIds.size(); i++) { client().prepareSearch("index").setSize(1).setScroll(TimeValue.timeValueMinutes(1)).get().decRef(); From e966d0d9da74cb24c97a17d174f1b65324411e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Thu, 29 Aug 2024 17:30:16 +0200 Subject: [PATCH 28/30] Removing the feature flag mechanism for LTR. (#112358) --- .../ml/DefaultMachineLearningExtension.java | 5 ----- .../elasticsearch/xpack/ml/MachineLearning.java | 14 +++++--------- .../xpack/ml/MachineLearningExtension.java | 4 ---- .../ml/LocalStateMachineLearningAdOnly.java | 2 +- .../ml/LocalStateMachineLearningDfaOnly.java | 2 +- .../ml/LocalStateMachineLearningNlpOnly.java | 2 +- .../MachineLearningInfoTransportActionTests.java | 9 +-------- .../xpack/ml/MachineLearningTests.java | 16 ++++------------ 8 files changed, 13 insertions(+), 41 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java index 66f4797ef707..fa94bf96c116 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/DefaultMachineLearningExtension.java @@ -51,11 +51,6 @@ public boolean isNlpEnabled() { return true; } - @Override - public boolean isLearningToRankEnabled() { - return true; - } - @Override public String[] getAnalyticsDestIndexAllowedSettings() { return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index c4bf92401be9..5876836185ba 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -902,7 +902,7 @@ private static void reportClashingNodeAttribute(String attrName) { @Override public List> getRescorers() { - if (enabled && machineLearningExtension.get().isLearningToRankEnabled()) { + if (enabled) { return List.of( new RescorerSpec<>( LearningToRankRescorerBuilder.NAME, @@ -1864,10 +1864,8 @@ public List getNamedXContent() { ) ); namedXContent.addAll(new CorrelationNamedContentProvider().getNamedXContentParsers()); - // LTR Combine with Inference named content provider when feature flag is removed - if (machineLearningExtension.get().isLearningToRankEnabled()) { - namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers()); - } + namedXContent.addAll(new MlLTRNamedXContentProvider().getNamedXContentParsers()); + return namedXContent; } @@ -1958,10 +1956,8 @@ public List getNamedWriteables() { namedWriteables.addAll(MlAutoscalingNamedWritableProvider.getNamedWriteables()); namedWriteables.addAll(new CorrelationNamedContentProvider().getNamedWriteables()); namedWriteables.addAll(new ChangePointNamedContentProvider().getNamedWriteables()); - // LTR Combine with Inference named content provider when feature flag is removed - if (machineLearningExtension.get().isLearningToRankEnabled()) { - namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables()); - } + namedWriteables.addAll(new MlLTRNamedXContentProvider().getNamedWriteables()); + return namedWriteables; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java index 528883439ef2..f46652978753 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearningExtension.java @@ -25,10 +25,6 @@ default void configure(Settings settings) {} boolean isNlpEnabled(); - default boolean isLearningToRankEnabled() { - return true; - } - default boolean disableInferenceProcessCache() { return false; } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java index 175a035a70f7..3ff3a4a404f9 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningAdOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningAdOnly extends LocalStateMachineLearning { public LocalStateMachineLearningAdOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, true, false, false))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java index f054e52dc29e..1a72f27865d8 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningDfaOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningDfaOnly extends LocalStateMachineLearning { public LocalStateMachineLearningDfaOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, true, false))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java index a3d684011e93..0f11e8033b83 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearningNlpOnly.java @@ -14,6 +14,6 @@ public class LocalStateMachineLearningNlpOnly extends LocalStateMachineLearning { public LocalStateMachineLearningNlpOnly(final Settings settings, final Path configPath) { - super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true, false))); + super(settings, configPath, new MlTestExtensionLoader(new MlTestExtension(true, true, false, false, true))); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index afa372fb9452..e5575abfeb02 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -160,14 +160,7 @@ private MachineLearningUsageTransportAction newUsageAction( licenseState, jobManagerHolder, new MachineLearningExtensionHolder( - new MachineLearningTests.MlTestExtension( - true, - true, - isAnomalyDetectionEnabled, - isDataFrameAnalyticsEnabled, - isNlpEnabled, - true - ) + new MachineLearningTests.MlTestExtension(true, true, isAnomalyDetectionEnabled, isDataFrameAnalyticsEnabled, isNlpEnabled) ) ); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java index c35b9da7b2bd..8a05537917ab 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java @@ -220,7 +220,7 @@ public void testNoAttributes_givenClash() throws IOException { public void testAnomalyDetectionOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, true, false, false)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -240,7 +240,7 @@ public void testAnomalyDetectionOnly() throws IOException { public void testDataFrameAnalyticsOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, true, false)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -260,7 +260,7 @@ public void testDataFrameAnalyticsOnly() throws IOException { public void testNlpOnly() throws IOException { Settings settings = Settings.builder().put("path.home", createTempDir()).build(); - MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true, false)); + MlTestExtensionLoader loader = new MlTestExtensionLoader(new MlTestExtension(false, false, false, false, true)); try (MachineLearning machineLearning = createTrialLicensedMachineLearning(settings, loader)) { List restHandlers = machineLearning.getRestHandlers(settings, null, null, null, null, null, null, null, null); assertThat(restHandlers, hasItem(instanceOf(RestMlInfoAction.class))); @@ -287,22 +287,19 @@ public static class MlTestExtension implements MachineLearningExtension { private final boolean isAnomalyDetectionEnabled; private final boolean isDataFrameAnalyticsEnabled; private final boolean isNlpEnabled; - private final boolean isLearningToRankEnabled; MlTestExtension( boolean useIlm, boolean includeNodeInfo, boolean isAnomalyDetectionEnabled, boolean isDataFrameAnalyticsEnabled, - boolean isNlpEnabled, - boolean isLearningToRankEnabled + boolean isNlpEnabled ) { this.useIlm = useIlm; this.includeNodeInfo = includeNodeInfo; this.isAnomalyDetectionEnabled = isAnomalyDetectionEnabled; this.isDataFrameAnalyticsEnabled = isDataFrameAnalyticsEnabled; this.isNlpEnabled = isNlpEnabled; - this.isLearningToRankEnabled = isLearningToRankEnabled; } @Override @@ -330,11 +327,6 @@ public boolean isNlpEnabled() { return isNlpEnabled; } - @Override - public boolean isLearningToRankEnabled() { - return isLearningToRankEnabled; - } - @Override public String[] getAnalyticsDestIndexAllowedSettings() { return ANALYTICS_DEST_INDEX_ALLOWED_SETTINGS; From 592858281400cfa608080aca5dac8331d3ea1f5c Mon Sep 17 00:00:00 2001 From: Fang Xing <155562079+fang-xing-esql@users.noreply.github.com> Date: Thu, 29 Aug 2024 12:03:03 -0400 Subject: [PATCH 29/30] [ES|QL] Combine 3 commonTypes into one (#112220) Combine 3 commonTypes into one. --- .../esql/core/type/DataTypeConverter.java | 80 --------- .../predicate/operator/arithmetic/Add.java | 1 - .../arithmetic/ArithmeticOperation.java | 7 +- .../BinaryComparisonInversible.java | 2 +- .../predicate/operator/arithmetic/Div.java | 1 - .../arithmetic/EsqlArithmeticOperation.java | 5 +- .../predicate/operator/arithmetic/Mul.java | 1 - .../predicate/operator/arithmetic/Sub.java | 1 - .../comparison/EsqlBinaryComparison.java | 4 +- .../predicate/operator/comparison/In.java | 4 +- .../rules/SimplifyComparisonsArithmetics.java | 4 +- .../esql/type/EsqlDataTypeConverter.java | 77 +++++++-- .../xpack/esql/type/EsqlDataTypeRegistry.java | 24 --- .../esql/type/DataTypeConversionTests.java | 20 --- .../esql/type/EsqlDataTypeConverterTests.java | 158 ++++++++++++++++++ 15 files changed, 234 insertions(+), 155 deletions(-) rename x-pack/plugin/{esql-core/src/main/java/org/elasticsearch/xpack/esql/core => esql/src/main/java/org/elasticsearch/xpack/esql}/expression/predicate/operator/arithmetic/ArithmeticOperation.java (80%) rename x-pack/plugin/{esql-core/src/main/java/org/elasticsearch/xpack/esql/core => esql/src/main/java/org/elasticsearch/xpack/esql}/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java (91%) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java index 1e68d63ef7bb..78b395503e70 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataTypeConverter.java @@ -38,7 +38,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.UNSIGNED_LONG_MAX; import static org.elasticsearch.xpack.esql.core.util.NumericUtils.inUnsignedLongRange; @@ -51,85 +50,6 @@ public final class DataTypeConverter { private DataTypeConverter() {} - /** - * Returns the type compatible with both left and right types - *

- * If one of the types is null - returns another type - * If both types are numeric - returns type with the highest precision int < long < float < double - * If one of the types is string and another numeric - returns numeric - */ - public static DataType commonType(DataType left, DataType right) { - if (left == right) { - return left; - } - if (left == NULL) { - return right; - } - if (right == NULL) { - return left; - } - if (isString(left) && isString(right)) { - if (left == TEXT || right == TEXT) { - return TEXT; - } - if (left == KEYWORD) { - return KEYWORD; - } - return right; - } - if (left.isNumeric() && right.isNumeric()) { - int lsize = left.estimatedSize().orElseThrow(); - int rsize = right.estimatedSize().orElseThrow(); - // if one is int - if (left.isWholeNumber()) { - // promote the highest int - if (right.isWholeNumber()) { - if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { - return UNSIGNED_LONG; - } - return lsize > rsize ? left : right; - } - // promote the rational - return right; - } - // try the other side - if (right.isWholeNumber()) { - return left; - } - // promote the highest rational - return lsize > rsize ? left : right; - } - if (isString(left)) { - if (right.isNumeric()) { - return right; - } - } - if (isString(right)) { - if (left.isNumeric()) { - return left; - } - } - - if (isDateTime(left) && isDateTime(right)) { - return DATETIME; - } - - // none found - return null; - } - - /** - * Returns true if the from type can be converted to the to type, false - otherwise - */ - public static boolean canConvert(DataType from, DataType to) { - // Special handling for nulls and if conversion is not requires - if (from == to || from == NULL) { - return true; - } - // only primitives are supported so far - return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null; - } - /** * Get the conversion from one type to another. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java index b6ec9b6fd0e2..8f8d885ee379 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java @@ -12,7 +12,6 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.NumericUtils; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java similarity index 80% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java index 8dc0f5808317..cb7e7c4643fb 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/ArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/ArithmeticOperation.java @@ -4,16 +4,17 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic; +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal; import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; +import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.DataTypeConverter; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNumeric; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class ArithmeticOperation extends BinaryOperator { @@ -36,7 +37,7 @@ public ArithmeticOperation swapLeftAndRight() { @Override public DataType dataType() { if (dataType == null) { - dataType = DataTypeConverter.commonType(left().dataType(), right().dataType()); + dataType = commonType(left().dataType(), right().dataType()); } return dataType; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java similarity index 91% rename from x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java index 358ad59ec635..b0ab4c48d970 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/BinaryComparisonInversible.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic; +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java index 0e4c506a90d8..f1e197cf350b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Div.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java index 647071c44cfd..400e70b64111 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/EsqlArithmeticOperation.java @@ -13,14 +13,12 @@ import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryArithmeticOperation; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.util.List; @@ -31,6 +29,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class EsqlArithmeticOperation extends ArithmeticOperation implements EvaluatorMapper { public static List getNamedWriteables() { @@ -133,7 +132,7 @@ public Object fold() { public DataType dataType() { if (dataType == null) { - dataType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType()); + dataType = commonType(left().dataType(), right().dataType()); } return dataType; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java index a73562ff153b..03981a821f52 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Mul.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.NumericUtils; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java index ee2ccc3b7107..27f5579129cc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java @@ -12,7 +12,6 @@ import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java index 52d4c111b2ea..b50d70e69819 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/EsqlBinaryComparison.java @@ -22,7 +22,6 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; import java.time.ZoneId; @@ -32,6 +31,7 @@ import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public abstract class EsqlBinaryComparison extends BinaryComparison implements EvaluatorMapper { public static List getNamedWriteables() { @@ -172,7 +172,7 @@ public EvalOperator.ExpressionEvaluator.Factory toEvaluator( Function toEvaluator ) { // Our type is always boolean, so figure out the evaluator type from the inputs - DataType commonType = EsqlDataTypeRegistry.INSTANCE.commonType(left().dataType(), right().dataType()); + DataType commonType = commonType(left().dataType(), right().dataType()); EvalOperator.ExpressionEvaluator.Factory lhs; EvalOperator.ExpressionEvaluator.Factory rhs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java index 636b31fcc691..333f32e82c57 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/comparison/In.java @@ -27,7 +27,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; +import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; import java.io.IOException; import java.util.BitSet; @@ -269,7 +269,7 @@ private DataType commonType() { break; } } - commonType = EsqlDataTypeRegistry.INSTANCE.commonType(commonType, e.dataType()); + commonType = EsqlDataTypeConverter.commonType(commonType, e.dataType()); } return commonType; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java index 4ef069ea16d0..fe83aeb647bf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SimplifyComparisonsArithmetics.java @@ -9,10 +9,10 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.ArithmeticOperation; -import org.elasticsearch.xpack.esql.core.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.ArithmeticOperation; +import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.BinaryComparisonInversible; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Neg; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Sub; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 1572f8950e0a..b090708a64ad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -58,6 +58,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; @@ -67,9 +68,14 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitiveAndSupported; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; +import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeDoubleToLong; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.safeToInt; @@ -107,18 +113,6 @@ public class EsqlDataTypeConverter { entry(VERSION, ToVersion::new) ); - /** - * Returns true if the from type can be converted to the to type, false - otherwise - */ - public static boolean canConvert(DataType from, DataType to) { - // Special handling for nulls and if conversion is not requires - if (from == to || from == NULL) { - return true; - } - // only primitives are supported so far - return isPrimitiveAndSupported(from) && isPrimitiveAndSupported(to) && converterFor(from, to) != null; - } - public static Converter converterFor(DataType from, DataType to) { // TODO move EXPRESSION_TO_LONG here if there is no regression if (isString(from)) { @@ -230,8 +224,63 @@ public static Object convert(Object value, DataType dataType) { return converter.convert(value); } + /** + * Returns the type compatible with both left and right types + *

+ * If one of the types is null - returns another type + * If both types are numeric - returns type with the highest precision int < long < float < double + */ public static DataType commonType(DataType left, DataType right) { - return DataTypeConverter.commonType(left, right); + if (left == right) { + return left; + } + if (left == NULL) { + return right; + } + if (right == NULL) { + return left; + } + if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { + if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { + return DATETIME; + } + if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { + return TIME_DURATION; + } + if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) { + return DATE_PERIOD; + } + } + if (isString(left) && isString(right)) { + if (left == TEXT || right == TEXT) { + return TEXT; + } + return right; + } + if (left.isNumeric() && right.isNumeric()) { + int lsize = left.estimatedSize().orElseThrow(); + int rsize = right.estimatedSize().orElseThrow(); + // if one is int + if (left.isWholeNumber()) { + // promote the highest int + if (right.isWholeNumber()) { + if (left == UNSIGNED_LONG || right == UNSIGNED_LONG) { + return UNSIGNED_LONG; + } + return lsize > rsize ? left : right; + } + // promote the rational + return right; + } + // try the other side + if (right.isWholeNumber()) { + return left; + } + // promote the highest rational + return lsize > rsize ? left : right; + } + // none found + return null; } // generally supporting abbreviations from https://en.wikipedia.org/wiki/Unit_of_time diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java index 96e206b82cf0..f8e8cd37dc8b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistry.java @@ -10,15 +10,6 @@ import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.xpack.esql.core.type.DataType; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; -import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; -import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; - public class EsqlDataTypeRegistry { public static final EsqlDataTypeRegistry INSTANCE = new EsqlDataTypeRegistry(); @@ -35,19 +26,4 @@ public DataType fromEs(String typeName, TimeSeriesParams.MetricType metricType) */ return metricType == TimeSeriesParams.MetricType.COUNTER ? type.widenSmallNumeric().counter() : type; } - - public DataType commonType(DataType left, DataType right) { - if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { - if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { - return DATETIME; - } - if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { - return TIME_DURATION; - } - if (isNullOrDatePeriod(left) && isNullOrDatePeriod(right)) { - return DATE_PERIOD; - } - } - return EsqlDataTypeConverter.commonType(left, right); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java index 9f8c8f91b703..871bf632adcc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/DataTypeConversionTests.java @@ -35,7 +35,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; -import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.commonType; import static org.elasticsearch.xpack.esql.core.type.DataTypeConverter.converterFor; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime; @@ -522,25 +521,6 @@ public void testConversionToIdentity() { assertEquals(10, conversion.convert(10)); } - public void testCommonType() { - assertEquals(BOOLEAN, commonType(BOOLEAN, NULL)); - assertEquals(BOOLEAN, commonType(NULL, BOOLEAN)); - assertEquals(BOOLEAN, commonType(BOOLEAN, BOOLEAN)); - assertEquals(NULL, commonType(NULL, NULL)); - assertEquals(INTEGER, commonType(INTEGER, KEYWORD)); - assertEquals(LONG, commonType(TEXT, LONG)); - assertEquals(SHORT, commonType(SHORT, BYTE)); - assertEquals(FLOAT, commonType(BYTE, FLOAT)); - assertEquals(FLOAT, commonType(FLOAT, INTEGER)); - assertEquals(UNSIGNED_LONG, commonType(UNSIGNED_LONG, LONG)); - assertEquals(DOUBLE, commonType(DOUBLE, FLOAT)); - assertEquals(FLOAT, commonType(FLOAT, UNSIGNED_LONG)); - - // strings - assertEquals(TEXT, commonType(TEXT, KEYWORD)); - assertEquals(TEXT, commonType(KEYWORD, TEXT)); - } - public void testEsDataTypes() { for (DataType type : DataType.types()) { assertEquals(type, DataType.fromTypeName(type.typeName())); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java index 0997c88aac2b..8ad083683f69 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java @@ -8,6 +8,44 @@ package org.elasticsearch.xpack.esql.type; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.util.Arrays; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.HALF_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.IP; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; +import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; +import static org.elasticsearch.xpack.esql.core.type.DataType.PARTIAL_AGG; +import static org.elasticsearch.xpack.esql.core.type.DataType.SCALED_FLOAT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT; +import static org.elasticsearch.xpack.esql.core.type.DataType.SOURCE; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TSID_DATA_TYPE; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; +import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; public class EsqlDataTypeConverterTests extends ESTestCase { @@ -16,4 +54,124 @@ public void testNanoTimeToString() { long actual = EsqlDataTypeConverter.dateNanosToLong(EsqlDataTypeConverter.nanoTimeToString(expected)); assertEquals(expected, actual); } + + public void testCommonTypeNull() { + for (DataType dataType : DataType.values()) { + assertEqualsCommonType(dataType, NULL, dataType); + } + } + + public void testCommonTypeStrings() { + List STRINGS = Arrays.stream(DataType.values()).filter(DataType::isString).toList(); + for (DataType dataType1 : STRINGS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL) { + assertEqualsCommonType(dataType1, NULL, dataType1); + } else if ((isString(dataType1) && isString(dataType2))) { + if (dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertEqualsCommonType(dataType1, dataType2, TEXT); + } + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + public void testCommonTypeDateTimeIntervals() { + List DATE_TIME_INTERVALS = Arrays.stream(DataType.values()).filter(DataType::isDateTimeOrTemporal).toList(); + for (DataType dataType1 : DATE_TIME_INTERVALS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL) { + assertEqualsCommonType(dataType1, NULL, dataType1); + } else if (isDateTimeOrTemporal(dataType2)) { + if (isDateTime(dataType1) || isDateTime(dataType2)) { + assertEqualsCommonType(dataType1, dataType2, DATETIME); + } else if (dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertNullCommonType(dataType1, dataType2); + } + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + public void testCommonTypeNumeric() { + // whole numbers + commonNumericType(BYTE, List.of(NULL, BYTE)); + commonNumericType(SHORT, List.of(NULL, BYTE, SHORT)); + commonNumericType(INTEGER, List.of(NULL, BYTE, SHORT, INTEGER)); + commonNumericType(LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG)); + commonNumericType(UNSIGNED_LONG, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG)); + // floats + commonNumericType(HALF_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT)); + commonNumericType(FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, FLOAT, HALF_FLOAT)); + commonNumericType(DOUBLE, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, DOUBLE, SCALED_FLOAT)); + commonNumericType(SCALED_FLOAT, List.of(NULL, BYTE, SHORT, INTEGER, LONG, UNSIGNED_LONG, HALF_FLOAT, FLOAT, SCALED_FLOAT, DOUBLE)); + } + + /** + * The first argument and the second argument(s) have the first argument as a common type. + */ + private static void commonNumericType(DataType numericType, List lowerTypes) { + List NUMERICS = Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList(); + List DOUBLES = Arrays.stream(DataType.values()).filter(DataType::isRationalNumber).toList(); + for (DataType dataType : DataType.values()) { + if (DOUBLES.containsAll(List.of(numericType, dataType)) && (dataType.estimatedSize().equals(numericType.estimatedSize()))) { + assertEquals(numericType, commonType(dataType, numericType)); + } else if (lowerTypes.contains(dataType)) { + assertEqualsCommonType(numericType, dataType, numericType); + } else if (NUMERICS.contains(dataType)) { + assertEqualsCommonType(numericType, dataType, dataType); + } else { + assertNullCommonType(numericType, dataType); + } + } + } + + public void testCommonTypeMiscellaneous() { + List MISCELLANEOUS = List.of( + COUNTER_INTEGER, + COUNTER_LONG, + COUNTER_DOUBLE, + UNSUPPORTED, + OBJECT, + SOURCE, + DATE_NANOS, + DOC_DATA_TYPE, + TSID_DATA_TYPE, + PARTIAL_AGG, + IP, + VERSION, + GEO_POINT, + GEO_SHAPE, + CARTESIAN_POINT, + CARTESIAN_SHAPE, + BOOLEAN + ); + for (DataType dataType1 : MISCELLANEOUS) { + for (DataType dataType2 : DataType.values()) { + if (dataType2 == NULL || dataType1 == dataType2) { + assertEqualsCommonType(dataType1, dataType2, dataType1); + } else { + assertNullCommonType(dataType1, dataType2); + } + } + } + } + + private static void assertEqualsCommonType(DataType dataType1, DataType dataType2, DataType commonType) { + assertEquals(commonType, commonType(dataType1, dataType2)); + assertEquals(commonType, commonType(dataType2, dataType1)); + } + + private static void assertNullCommonType(DataType dataType1, DataType dataType2) { + assertNull(commonType(dataType1, dataType2)); + assertNull(commonType(dataType2, dataType1)); + } } From e3e562ffbfb981014bdd71bf663bb6f972f5e352 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Thu, 29 Aug 2024 17:18:54 +0100 Subject: [PATCH 30/30] [ML] Support sparse embedding models in the elasticsearch inference service (#112270) For a sparse embedding model created with the ml trained models APIs --- docs/changelog/112270.yaml | 5 + .../inference/service-elasticsearch.asciidoc | 3 +- .../xpack/inference/CustomElandModelIT.java | 134 +++++++++ .../xpack/inference/RerankingIT.java | 8 +- .../BaseElasticsearchInternalService.java | 6 +- .../ElasticsearchInternalService.java | 151 +++------- .../services/elser/ElserInternalService.java | 28 -- .../ElasticsearchInternalServiceTests.java | 278 +++++++++++------- 8 files changed, 363 insertions(+), 250 deletions(-) create mode 100644 docs/changelog/112270.yaml create mode 100644 x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java diff --git a/docs/changelog/112270.yaml b/docs/changelog/112270.yaml new file mode 100644 index 000000000000..1e6b9c7fc929 --- /dev/null +++ b/docs/changelog/112270.yaml @@ -0,0 +1,5 @@ +pr: 112270 +summary: Support sparse embedding models in the elasticsearch inference service +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 99fd41ee2db6..572cad591fba 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -31,6 +31,7 @@ include::inference-shared.asciidoc[tag=task-type] Available task types: * `rerank`, +* `sparse_embedding`, * `text_embedding`. -- @@ -182,4 +183,4 @@ PUT _inference/text_embedding/my-e5-model } } ------------------------------------------------------------ -// TEST[skip:TBD] \ No newline at end of file +// TEST[skip:TBD] diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java new file mode 100644 index 000000000000..65b7a138e7e1 --- /dev/null +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/CustomElandModelIT.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference; + +import org.elasticsearch.client.Request; +import org.elasticsearch.core.Strings; +import org.elasticsearch.inference.TaskType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.List; +import java.util.stream.Collectors; + +public class CustomElandModelIT extends InferenceBaseRestTest { + + // The model definition is taken from org.elasticsearch.xpack.ml.integration.TextExpansionQueryIT + + static final String BASE_64_ENCODED_MODEL = "UEsDBAAACAgAAAAAAAAAAAAAAAAAA" + + "AAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VG" + + "V4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29" + + "rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxl" + + "bW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7" + + "/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77" + + "KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVc" + + "tJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdF" + + "ZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26Qu" + + "Zy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9" + + "UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/e" + + "UuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZ" + + "W1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlp" + + "aWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNL" + + "Fgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vL" + + "qbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLkt" + + "gNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjg" + + "hg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0Y" + + "gbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSR" + + "rhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9" + + "ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRgl" + + "XIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2" + + "dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/" + + "whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAA" + + "BkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgI" + + "AAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlp" + + "aWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAA" + + "AACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLA" + + "QIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19f" + + "dG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVt" + + "b2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAA" + + "AAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAA" + + "AAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAA" + + "AAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA="; + + static final long RAW_MODEL_SIZE; // size of the model before base64 encoding + static { + RAW_MODEL_SIZE = Base64.getDecoder().decode(BASE_64_ENCODED_MODEL).length; + } + + // Test a sparse embedding model deployed with the ml trained models APIs + public void testSparse() throws IOException { + String modelId = "custom-text-expansion-model"; + + createTextExpansionModel(modelId); + putModelDefinition(modelId, BASE_64_ENCODED_MODEL, RAW_MODEL_SIZE); + putVocabulary( + List.of("these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"), + modelId + ); + + var inferenceConfig = """ + { + "service": "elasticsearch", + "service_settings": { + "model_id": "custom-text-expansion-model", + "num_allocations": 1, + "num_threads": 1 + } + } + """; + + var inferenceId = "sparse-inf"; + putModel(inferenceId, inferenceConfig, TaskType.SPARSE_EMBEDDING); + var results = inferOnMockService(inferenceId, List.of("washing", "machine")); + deleteModel(inferenceId); + assertNotNull(results.get("sparse_embedding")); + } + + protected void createTextExpansionModel(String modelId) throws IOException { + // with_special_tokens: false for this test with limited vocab + Request request = new Request("PUT", "/_ml/trained_models/" + modelId); + request.setJsonEntity(""" + { + "description": "a text expansion model", + "model_type": "pytorch", + "inference_config": { + "text_expansion": { + "tokenization": { + "bert": { + "with_special_tokens": false + } + } + } + } + }"""); + client().performRequest(request); + } + + protected void putVocabulary(List vocabulary, String modelId) throws IOException { + List vocabularyWithPad = new ArrayList<>(); + vocabularyWithPad.add("[PAD]"); + vocabularyWithPad.add("[UNK]"); + vocabularyWithPad.addAll(vocabulary); + String quotedWords = vocabularyWithPad.stream().map(s -> "\"" + s + "\"").collect(Collectors.joining(",")); + + Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/vocabulary"); + request.setJsonEntity(Strings.format(""" + { "vocabulary": [%s] } + """, quotedWords)); + client().performRequest(request); + } + + protected void putModelDefinition(String modelId, String base64EncodedModel, long unencodedModelSize) throws IOException { + Request request = new Request("PUT", "_ml/trained_models/" + modelId + "/definition/0"); + String body = Strings.format(""" + {"total_definition_length":%s,"definition": "%s","total_parts": 1}""", unencodedModelSize, base64EncodedModel); + request.setJsonEntity(body); + client().performRequest(request); + } +} diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java index 77251ada4c48..893d3fb3e9b8 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/RerankingIT.java @@ -35,7 +35,7 @@ private String putCohereRerankEndpoint() throws IOException { "api_key": "" } } - """);// TODO remove key + """); return endpointID; } @@ -61,7 +61,7 @@ private String putCohereRerankEndpointWithDocuments() throws IOException { "return_documents": true } } - """);// TODO remove key + """); return endpointID; } @@ -81,13 +81,13 @@ private String putCohereRerankEndpointWithTop2() throws IOException { "service": "cohere", "service_settings": { "model_id": "rerank-english-v2.0", - "api_key": "8TNPBvpBO7oN97009HQHzQbBhNrxmREbcJrZCwkK" + "api_key": "" }, "task_settings": { "top_n": 2 } } - """);// TODO remove key + """); return endpointID; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index 574ca77d4587..457416370e55 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -154,10 +154,10 @@ public void isModelDownloaded(Model model, ActionListener listener) { executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); } else { listener.onFailure( - new IllegalArgumentException( - "Unable to determine supported model for [" + new IllegalStateException( + "Can not check the download status of the model used by [" + model.getConfigurations().getInferenceEntityId() - + "] please verify the request and submit a bug report if necessary." + + "] as the model_id cannot be found." ) ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index c3a011156231..cca8ae63e974 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; @@ -27,19 +25,18 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; -import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.action.StopTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; @@ -53,8 +50,6 @@ import java.util.Set; import java.util.function.Function; -import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; -import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; @@ -71,15 +66,13 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 ); - private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); - public ElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { super(context); } @Override protected EnumSet supportedTaskTypes() { - return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING); + return EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING); } @Override @@ -161,6 +154,12 @@ private static CustomElandModel createCustomElandModel( NAME, CustomElandInternalTextEmbeddingServiceSettings.fromMap(serviceSettings, context) ); + case SPARSE_EMBEDDING -> new CustomElandModel( + inferenceEntityId, + taskType, + NAME, + elandServiceSettings(serviceSettings, context) + ); case RERANK -> new CustomElandRerankModel( inferenceEntityId, taskType, @@ -334,6 +333,8 @@ public void infer( inferTextEmbedding(model, input, inputType, timeout, listener); } else if (TaskType.RERANK.equals(taskType)) { inferRerank(model, query, input, inputType, timeout, taskSettings, listener); + } else if (TaskType.SPARSE_EMBEDDING.equals(taskType)) { + inferSparseEmbedding(model, input, inputType, timeout, listener); } else { throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); } @@ -364,6 +365,31 @@ public void inferTextEmbedding( ); } + public void inferSparseEmbedding( + Model model, + List inputs, + InputType inputType, + TimeValue timeout, + ActionListener listener + ) { + var request = buildInferenceRequest( + model.getConfigurations().getInferenceEntityId(), + TextExpansionConfigUpdate.EMPTY_UPDATE, + inputs, + inputType, + timeout, + false + ); + + client.execute( + InferModelAction.INSTANCE, + request, + listener.delegateFailureAndWrap( + (l, inferenceResult) -> l.onResponse(SparseEmbeddingResults.of(inferenceResult.getInferenceResults())) + ) + ); + } + public void inferRerank( Model model, String query, @@ -422,7 +448,7 @@ public void chunkedInfer( TimeValue timeout, ActionListener> listener ) { - if (TaskType.TEXT_EMBEDDING.isAnyOrSame(model.getTaskType()) == false) { + if ((TaskType.TEXT_EMBEDDING.equals(model.getTaskType()) || TaskType.SPARSE_EMBEDDING.equals(model.getTaskType())) == false) { listener.onFailure( new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(model.getTaskType(), NAME), RestStatus.BAD_REQUEST) ); @@ -464,6 +490,8 @@ private static List translateToChunkedResults(Li private static ChunkedInferenceServiceResults translateToChunkedResult(InferenceResults inferenceResult) { if (inferenceResult instanceof MlChunkedTextEmbeddingFloatResults mlChunkedResult) { return InferenceChunkedTextEmbeddingFloatResults.ofMlResults(mlChunkedResult); + } else if (inferenceResult instanceof MlChunkedTextExpansionResults mlChunkedResult) { + return InferenceChunkedSparseEmbeddingResults.ofMlResult(mlChunkedResult); } else if (inferenceResult instanceof ErrorInferenceResults error) { return new ErrorChunkedInferenceResults(error.getException()); } else { @@ -471,103 +499,6 @@ private static ChunkedInferenceServiceResults translateToChunkedResult(Inference } } - @Override - public void start(Model model, ActionListener listener) { - if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - return; - } - - if (model.getTaskType() != TaskType.TEXT_EMBEDDING && model.getTaskType() != TaskType.RERANK) { - listener.onFailure( - new IllegalStateException(TaskType.unsupportedTaskTypeErrorMsg(model.getConfigurations().getTaskType(), NAME)) - ); - return; - } - - var startRequest = ((ElasticsearchInternalModel) model).getStartTrainedModelDeploymentActionRequest(); - var responseListener = ((ElasticsearchInternalModel) model).getCreateTrainedModelAssignmentActionListener(model, listener); - - client.execute(StartTrainedModelDeploymentAction.INSTANCE, startRequest, responseListener); - } - - @Override - public void stop(String inferenceEntityId, ActionListener listener) { - var request = new StopTrainedModelDeploymentAction.Request(inferenceEntityId); - request.setForce(true); - client.execute( - StopTrainedModelDeploymentAction.INSTANCE, - request, - listener.delegateFailureAndWrap((delegatedResponseListener, response) -> delegatedResponseListener.onResponse(Boolean.TRUE)) - ); - } - - @Override - public void putModel(Model model, ActionListener listener) { - if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - return; - } else if (model instanceof MultilingualE5SmallModel e5Model) { - String modelId = e5Model.getServiceSettings().modelId(); - var input = new TrainedModelInput(List.of("text_field")); // by convention text_field is used - var config = TrainedModelConfig.builder().setInput(input).setModelId(modelId).validate(true).build(); - PutTrainedModelAction.Request putRequest = new PutTrainedModelAction.Request(config, false, true); - executeAsyncWithOrigin( - client, - INFERENCE_ORIGIN, - PutTrainedModelAction.INSTANCE, - putRequest, - ActionListener.wrap(response -> listener.onResponse(Boolean.TRUE), e -> { - if (e instanceof ElasticsearchStatusException esException - && esException.getMessage().contains(PutTrainedModelAction.MODEL_ALREADY_EXISTS_ERROR_MESSAGE_FRAGMENT)) { - listener.onResponse(Boolean.TRUE); - } else { - listener.onFailure(e); - } - }) - ); - } else if (model instanceof CustomElandModel) { - logger.info("Custom eland model detected, model must have been already loaded into the cluster with eland."); - listener.onResponse(Boolean.TRUE); - } else { - listener.onFailure( - new IllegalArgumentException( - "Can not download model automatically for [" - + model.getConfigurations().getInferenceEntityId() - + "] you may need to download it through the trained models API or with eland." - ) - ); - return; - } - } - - @Override - public void isModelDownloaded(Model model, ActionListener listener) { - ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { - if (response.getResources().count() < 1) { - delegate.onResponse(Boolean.FALSE); - } else { - delegate.onResponse(Boolean.TRUE); - } - }); - - if (model.getServiceSettings() instanceof ElasticsearchInternalServiceSettings internalServiceSettings) { - String modelId = internalServiceSettings.modelId(); - GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); - executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); - } else if (model instanceof ElasticsearchInternalModel == false) { - listener.onFailure(notElasticsearchModelException(model)); - } else { - listener.onFailure( - new IllegalArgumentException( - "Unable to determine supported model for [" - + model.getConfigurations().getInferenceEntityId() - + "] please verify the request and submit a bug report if necessary." - ) - ); - } - } - @Override public TransportVersion getMinimalSupportedVersion() { return TransportVersions.V_8_14_0; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java index 775ddca16046..948117954a63 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java @@ -28,7 +28,6 @@ import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; -import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; @@ -43,8 +42,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.core.ClientHelper.INFERENCE_ORIGIN; -import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; import static org.elasticsearch.xpack.inference.services.elser.ElserModels.ELSER_V2_MODEL; @@ -242,31 +239,6 @@ private void checkCompatibleTaskType(TaskType taskType) { } } - @Override - public void isModelDownloaded(Model model, ActionListener listener) { - ActionListener getModelsResponseListener = listener.delegateFailure((delegate, response) -> { - if (response.getResources().count() < 1) { - delegate.onResponse(Boolean.FALSE); - } else { - delegate.onResponse(Boolean.TRUE); - } - }); - - if (model instanceof ElserInternalModel elserModel) { - String modelId = elserModel.getServiceSettings().modelId(); - GetTrainedModelsAction.Request getRequest = new GetTrainedModelsAction.Request(modelId); - executeAsyncWithOrigin(client, INFERENCE_ORIGIN, GetTrainedModelsAction.INSTANCE, getRequest, getModelsResponseListener); - } else { - listener.onFailure( - new IllegalArgumentException( - "Can not download model automatically for [" - + model.getConfigurations().getInferenceEntityId() - + "] you may need to download it through the trained models API or with eland." - ) - ); - } - } - private static ElserMlNodeTaskSettings taskSettingsFromMap(TaskType taskType, Map config) { if (taskType != TaskType.SPARSE_EMBEDDING) { throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index e6fd725a5019..257616033f08 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInferenceServiceResults; import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.inference.InferenceServiceExtension; import org.elasticsearch.inference.InputType; @@ -31,6 +32,7 @@ import org.elasticsearch.xpack.core.action.util.QueryPage; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.InferModelAction; @@ -39,8 +41,10 @@ import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; +import org.elasticsearch.xpack.core.ml.inference.results.InferenceChunkedTextExpansionResultsTests; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResults; import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextEmbeddingFloatResultsTests; +import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; @@ -52,12 +56,10 @@ import org.mockito.Mockito; import java.util.ArrayList; -import java.util.Arrays; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -76,7 +78,6 @@ public class ElasticsearchInternalServiceTests extends ESTestCase { - TaskType taskType = TaskType.TEXT_EMBEDDING; String randomInferenceEntityId = randomAlphaOfLength(10); private static ThreadPool threadPool; @@ -92,7 +93,25 @@ public void shutdownThreadPool() { } public void testParseRequestConfig() { + var service = createService(mock(Client.class)); + var settings = new HashMap(); + settings.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) + ) + ); + ActionListener modelListener = ActionListener.wrap( + model -> fail("Model parsing should have failed"), + e -> assertThat(e, instanceOf(IllegalArgumentException.class)) + ); + + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); + service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); + } + + public void testParseRequestConfig_Misconfigured() { // Null model variant { var service = createService(mock(Client.class)); @@ -109,43 +128,10 @@ public void testParseRequestConfig() { e -> assertThat(e, instanceOf(IllegalArgumentException.class)) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } - // Valid model variant - { - var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElasticsearchInternalServiceSettings.NUM_THREADS, - 4, - ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID - ) - ) - ); - - var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( - 1, - 4, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, - null - ); - - service.parseRequestConfig( - randomInferenceEntityId, - taskType, - settings, - Set.of(), - getModelVerificationActionListener(e5ServiceSettings) - ); - } - // Invalid config map { var service = createService(mock(Client.class)); @@ -163,10 +149,12 @@ public void testParseRequestConfig() { e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } + } - // Invalid service settings + public void testParseRequestConfig_E5() { { var service = createService(mock(Client.class)); var settings = new HashMap(); @@ -179,52 +167,28 @@ public void testParseRequestConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock - // the threadpool within the client - "not_a_valid_service_setting", - randomAlphaOfLength(10) + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID ) ) ); - ActionListener modelListener = ActionListener.wrap( - model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) - ); - - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); - } - - // Extra service settings - { - var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElasticsearchInternalServiceSettings.NUM_THREADS, - 4, - ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, // we can't directly test the eland case until we mock - // the threadpool within the client - "extra_setting_that_should_not_be_here", - randomAlphaOfLength(10) - ) - ) + var e5ServiceSettings = new MultilingualE5SmallInternalServiceSettings( + 1, + 4, + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + null ); - ActionListener modelListener = ActionListener.wrap( - model -> fail("Model parsing should have failed"), - e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) + service.parseRequestConfig( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + settings, + Set.of(), + getModelVerificationActionListener(e5ServiceSettings) ); - - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); } - // Extra settings + // Invalid service settings { var service = createService(mock(Client.class)); var settings = new HashMap(); @@ -237,19 +201,19 @@ public void testParseRequestConfig() { ElasticsearchInternalServiceSettings.NUM_THREADS, 4, ElasticsearchInternalServiceSettings.MODEL_ID, - ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID // we can't directly test the eland case until we mock - // the threadpool within the client + ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID, + "not_a_valid_service_setting", + randomAlphaOfLength(10) ) ) ); - settings.put("extra_setting_that_should_not_be_here", randomAlphaOfLength(10)); ActionListener modelListener = ActionListener.wrap( model -> fail("Model parsing should have failed"), e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) ); - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, Set.of(), modelListener); + service.parseRequestConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings, Set.of(), modelListener); } } @@ -342,10 +306,53 @@ public void testParseRequestConfig_Rerank_DefaultTaskSettings() { } } + @SuppressWarnings("unchecked") + public void testParseRequestConfig_SparseEmbedding() { + var client = mock(Client.class); + doAnswer(invocation -> { + var listener = (ActionListener) invocation.getArguments()[2]; + listener.onResponse( + new GetTrainedModelsAction.Response(new QueryPage<>(List.of(mock(TrainedModelConfig.class)), 1, mock(ParseField.class))) + ); + return null; + }).when(client).execute(Mockito.same(GetTrainedModelsAction.INSTANCE), any(), any()); + + when(client.threadPool()).thenReturn(threadPool); + + var service = createService(client); + var settings = new HashMap(); + settings.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of( + ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, + 1, + ElasticsearchInternalServiceSettings.NUM_THREADS, + 4, + ElasticsearchInternalServiceSettings.MODEL_ID, + "foo" + ) + ) + ); + + ActionListener modelListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(CustomElandModel.class)); + assertThat(model.getTaskSettings(), instanceOf(EmptyTaskSettings.class)); + assertThat(model.getServiceSettings(), instanceOf(CustomElandInternalServiceSettings.class)); + }, e -> { fail("Model parsing failed " + e.getMessage()); }); + + service.parseRequestConfig(randomInferenceEntityId, TaskType.SPARSE_EMBEDDING, settings, Set.of(), modelListener); + } + private ActionListener getModelVerificationActionListener(MultilingualE5SmallInternalServiceSettings e5ServiceSettings) { return ActionListener.wrap(model -> { assertEquals( - new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings), + new MultilingualE5SmallModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + e5ServiceSettings + ), model ); }, e -> { fail("Model parsing failed " + e.getMessage()); }); @@ -371,7 +378,10 @@ public void testParsePersistedConfig() { ) ); - expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); + expectThrows( + IllegalArgumentException.class, + () -> service.parsePersistedConfig(randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings) + ); } @@ -397,12 +407,17 @@ public void testParsePersistedConfig() { CustomElandEmbeddingModel parsedModel = (CustomElandEmbeddingModel) service.parsePersistedConfig( randomInferenceEntityId, - taskType, + TaskType.TEXT_EMBEDDING, settings ); var elandServiceSettings = new CustomElandInternalTextEmbeddingServiceSettings(1, 4, "invalid", null); assertEquals( - new CustomElandEmbeddingModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, elandServiceSettings), + new CustomElandEmbeddingModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + elandServiceSettings + ), parsedModel ); } @@ -436,11 +451,16 @@ public void testParsePersistedConfig() { MultilingualE5SmallModel parsedModel = (MultilingualE5SmallModel) service.parsePersistedConfig( randomInferenceEntityId, - taskType, + TaskType.TEXT_EMBEDDING, settings ); assertEquals( - new MultilingualE5SmallModel(randomInferenceEntityId, taskType, ElasticsearchInternalService.NAME, e5ServiceSettings), + new MultilingualE5SmallModel( + randomInferenceEntityId, + TaskType.TEXT_EMBEDDING, + ElasticsearchInternalService.NAME, + e5ServiceSettings + ), parsedModel ); } @@ -456,6 +476,8 @@ public void testParsePersistedConfig() { ) ); settings.put("not_a_valid_config_setting", randomAlphaOfLength(10)); + + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); } @@ -476,12 +498,13 @@ public void testParsePersistedConfig() { ) ) ); + var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); expectThrows(IllegalArgumentException.class, () -> service.parsePersistedConfig(randomInferenceEntityId, taskType, settings)); } } @SuppressWarnings("unchecked") - public void testChunkInfer() { + public void testChunkInfer_e5() { var mlTrainedModelResults = new ArrayList(); mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults()); mlTrainedModelResults.add(MlChunkedTextEmbeddingFloatResultsTests.createRandomResults()); @@ -568,6 +591,63 @@ public void testChunkInfer() { assertTrue("Listener not called", gotResults.get()); } + @SuppressWarnings("unchecked") + public void testChunkInfer_Sparse() { + var mlTrainedModelResults = new ArrayList(); + mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); + mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); + mlTrainedModelResults.add(new ErrorInferenceResults(new RuntimeException("boom"))); + var response = new InferModelAction.Response(mlTrainedModelResults, "foo", true); + + ThreadPool threadpool = new TestThreadPool("test"); + Client client = mock(Client.class); + when(client.threadPool()).thenReturn(threadpool); + doAnswer(invocationOnMock -> { + var listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(response); + return null; + }).when(client).execute(same(InferModelAction.INSTANCE), any(InferModelAction.Request.class), any(ActionListener.class)); + + var model = new CustomElandModel( + "foo", + TaskType.SPARSE_EMBEDDING, + "elasticsearch", + new ElasticsearchInternalServiceSettings(1, 1, "model-id", null) + ); + var service = createService(client); + + var gotResults = new AtomicBoolean(); + var resultsListener = ActionListener.>wrap(chunkedResponse -> { + assertThat(chunkedResponse, hasSize(3)); + assertThat(chunkedResponse.get(0), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); + var result1 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(0); + assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(0)).getChunks(), result1.getChunkedResults()); + assertThat(chunkedResponse.get(1), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); + var result2 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(1); + assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(1)).getChunks(), result2.getChunkedResults()); + var result3 = (ErrorChunkedInferenceResults) chunkedResponse.get(2); + assertThat(result3.getException(), instanceOf(RuntimeException.class)); + assertThat(result3.getException().getMessage(), containsString("boom")); + gotResults.set(true); + }, ESTestCase::fail); + + service.chunkedInfer( + model, + null, + List.of("foo", "bar"), + Map.of(), + InputType.SEARCH, + new ChunkingOptions(null, null), + InferenceAction.Request.DEFAULT_TIMEOUT, + ActionListener.runAfter(resultsListener, () -> terminate(threadpool)) + ); + + if (gotResults.get() == false) { + terminate(threadpool); + } + assertTrue("Listener not called", gotResults.get()); + } + @SuppressWarnings("unchecked") public void testChunkInferSetsTokenization() { var expectedSpan = new AtomicInteger(); @@ -711,7 +791,7 @@ public void testParseRequestConfigEland_PreservesTaskType() { ) ); - var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING)); + var taskType = randomFrom(EnumSet.of(TaskType.RERANK, TaskType.TEXT_EMBEDDING, TaskType.SPARSE_EMBEDDING)); CustomElandModel expectedModel = getCustomElandModel(taskType); PlainActionFuture listener = new PlainActionFuture<>(); @@ -739,6 +819,13 @@ private CustomElandModel getCustomElandModel(TaskType taskType) { ElasticsearchInternalService.NAME, serviceSettings ); + } else if (taskType == TaskType.SPARSE_EMBEDDING) { + expectedModel = new CustomElandModel( + randomInferenceEntityId, + taskType, + ElasticsearchInternalService.NAME, + new CustomElandInternalServiceSettings(1, 4, "custom-model", null) + ); } return expectedModel; } @@ -867,21 +954,4 @@ private ElasticsearchInternalService createService(Client client) { var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client); return new ElasticsearchInternalService(context); } - - public static Model randomModelConfig(String inferenceEntityId) { - List givenList = Arrays.asList("MultilingualE5SmallModel"); - Random rand = org.elasticsearch.common.Randomness.get(); - String model = givenList.get(rand.nextInt(givenList.size())); - - return switch (model) { - case "MultilingualE5SmallModel" -> new MultilingualE5SmallModel( - inferenceEntityId, - TaskType.TEXT_EMBEDDING, - ElasticsearchInternalService.NAME, - MultilingualE5SmallInternalServiceSettingsTests.createRandom() - ); - default -> throw new IllegalArgumentException("model " + model + " is not supported for testing"); - }; - } - }