diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index 99188c3bdaa..42cfac130e0 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -1,7 +1,7 @@ Package: rdeephaven Type: Package Title: R Client for Deephaven Core -Version: 0.37.0 +Version: 0.38.0 Date: 2023-05-12 Author: Deephaven Data Labs Maintainer: Alex Peters diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 76254a824b1..fe0f981259b 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -26,7 +26,7 @@ dependencies { because('needed by plugin com.bmuschko.docker-remote-api') } - implementation('com.avast.gradle:gradle-docker-compose-plugin:0.17.10') { + implementation('com.avast.gradle:gradle-docker-compose-plugin:0.17.11') { because('needed by plugin com.avast.gradle.docker-compose') } diff --git a/cpp-client/deephaven/CMakeLists.txt b/cpp-client/deephaven/CMakeLists.txt index 32e7b5fabe5..0cdddc47491 100644 --- a/cpp-client/deephaven/CMakeLists.txt +++ b/cpp-client/deephaven/CMakeLists.txt @@ -8,7 +8,7 @@ endif() project(deephaven) -set(deephaven_VERSION 0.37.0) +set(deephaven_VERSION 0.38.0) set(CMAKE_CXX_STANDARD 17) # for CMAKE_INSTALL_{dir} diff --git a/cpp-client/deephaven/tests/src/table_test.cc b/cpp-client/deephaven/tests/src/table_test.cc index f2396697eef..9f7dbdc71c8 100644 --- a/cpp-client/deephaven/tests/src/table_test.cc +++ b/cpp-client/deephaven/tests/src/table_test.cc @@ -41,8 +41,8 @@ TEST_CASE("Fetch the entire table (small)", "[client_table]") { "Bools = ii == 5 ? null : ((ii % 2) == 0)", "Strings = ii == 5 ? null : `hello ` + i", "DateTimes = ii == 5 ? null : '2001-03-01T12:34:56Z' + ii", - "LocalDates = ii == 5 ? null : parseLocalDate(`2001-3-` + (ii + 1))", - "LocalTimes = ii == 5 ? null : parseLocalTime(`12:34:` + (46 + ii))" + "LocalDates = ii == 5 ? null : '2001-03-01' + ((int)ii * 'P1D')", + "LocalTimes = ii == 5 ? null : '12:34:46'.plus((int)ii * 'PT1S')" }); std::cout << th.Stream(true) << '\n'; diff --git a/cpp-client/deephaven/tests/src/ticking_test.cc b/cpp-client/deephaven/tests/src/ticking_test.cc index 5d8ae0d6ae1..e722b06a8f1 100644 --- a/cpp-client/deephaven/tests/src/ticking_test.cc +++ b/cpp-client/deephaven/tests/src/ticking_test.cc @@ -14,6 +14,8 @@ using deephaven::client::Client; using deephaven::client::TableHandle; using deephaven::client::utility::TableMaker; using deephaven::dhcore::DateTime; +using deephaven::dhcore::LocalDate; +using deephaven::dhcore::LocalTime; using deephaven::dhcore::chunk::ChunkMaker; using deephaven::dhcore::chunk::BooleanChunk; using deephaven::dhcore::chunk::DateTimeChunk; @@ -188,6 +190,8 @@ class WaitForPopulatedTableCallback final : public CommonBase { auto bools = MakeReservedVector>(target_); auto strings = MakeReservedVector>(target_); auto date_times = MakeReservedVector>(target_); + auto local_dates = MakeReservedVector>(target_); + auto local_times = MakeReservedVector>(target_); auto date_time_start = DateTime::Parse("2001-03-01T12:34:56Z"); @@ -202,6 +206,8 @@ class WaitForPopulatedTableCallback final : public CommonBase { bools.emplace_back((i % 2) == 0); strings.emplace_back(fmt::format("hello {}", i)); date_times.emplace_back(DateTime::FromNanos(date_time_start.Nanos() + i)); + local_dates.emplace_back(LocalDate::Of(2001, 3, i + 1)); + local_times.emplace_back(LocalTime::Of(12, 34, 46 + i)); } if (target_ == 0) { @@ -220,6 +226,8 @@ class WaitForPopulatedTableCallback final : public CommonBase { bools[t2] = {}; strings[t2] = {}; date_times[t2] = {}; + local_dates[t2] = {}; + local_times[t2] = {}; CompareColumn(*current, "Chars", chars); CompareColumn(*current, "Bytes", int8s); @@ -231,6 +239,8 @@ class WaitForPopulatedTableCallback final : public CommonBase { CompareColumn(*current, "Bools", bools); CompareColumn(*current, "Strings", strings); CompareColumn(*current, "DateTimes", date_times); + CompareColumn(*current, "LocalDates", local_dates); + CompareColumn(*current, "LocalTimes", local_times); NotifyDone(); } @@ -255,7 +265,9 @@ TEST_CASE("Ticking Table: all the data is eventually present", "[ticking]") { "Doubles = II == 5 ? null : (double)II", "Bools = II == 5 ? null : ((II % 2) == 0)", "Strings = II == 5 ? null : (`hello ` + II)", - "DateTimes = II == 5 ? null : ('2001-03-01T12:34:56Z' + II)" + "DateTimes = II == 5 ? null : ('2001-03-01T12:34:56Z' + II)", + "LocalDates = ii == 5 ? null : '2001-03-01' + ((int)II * 'P1D')", + "LocalTimes = ii == 5 ? null : '12:34:46'.plus((int)II * 'PT1S')" }) .LastBy("II") .Sort(SortPair::Ascending("II")); diff --git a/csharp/ExcelAddInInstaller/ExcelAddInInstaller.aip b/csharp/ExcelAddInInstaller/ExcelAddInInstaller.aip index c4940030ac1..e67e5d01d15 100644 --- a/csharp/ExcelAddInInstaller/ExcelAddInInstaller.aip +++ b/csharp/ExcelAddInInstaller/ExcelAddInInstaller.aip @@ -8,6 +8,9 @@ + + + @@ -140,6 +143,8 @@ + + @@ -263,6 +268,9 @@ + + + diff --git a/csharp/ExcelAddInInstaller/resources/Banner.jpg b/csharp/ExcelAddInInstaller/resources/Banner.jpg new file mode 100644 index 00000000000..6b532a20133 Binary files /dev/null and b/csharp/ExcelAddInInstaller/resources/Banner.jpg differ diff --git a/csharp/ExcelAddInInstaller/resources/Dialog.jpg b/csharp/ExcelAddInInstaller/resources/Dialog.jpg new file mode 100644 index 00000000000..87363309312 Binary files /dev/null and b/csharp/ExcelAddInInstaller/resources/Dialog.jpg differ diff --git a/csharp/ExcelAddInInstaller/resources/Icon.ico b/csharp/ExcelAddInInstaller/resources/Icon.ico new file mode 100644 index 00000000000..bafb69cb572 Binary files /dev/null and b/csharp/ExcelAddInInstaller/resources/Icon.ico differ diff --git a/docker/registry/fedora/gradle.properties b/docker/registry/fedora/gradle.properties index e683ab64598..1d5d878ffbe 100644 --- a/docker/registry/fedora/gradle.properties +++ b/docker/registry/fedora/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=fedora:39 -deephaven.registry.imageId=fedora@sha256:f23412a1ad7c430fc5ed9c029b15715aed3d50e6322902a066869310cddaf915 +deephaven.registry.imageId=fedora@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077 diff --git a/docker/registry/go/gradle.properties b/docker/registry/go/gradle.properties index 755143bb315..b10ac79995a 100644 --- a/docker/registry/go/gradle.properties +++ b/docker/registry/go/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=golang:1 -deephaven.registry.imageId=golang@sha256:ad5c126b5cf501a8caef751a243bb717ec204ab1aa56dc41dc11be089fafcb4f +deephaven.registry.imageId=golang@sha256:73f06be4578c9987ce560087e2e2ea6485fb605e3910542cadd8fa09fc5f3e31 diff --git a/docker/registry/manylinux2014_x86_64/gradle.properties b/docker/registry/manylinux2014_x86_64/gradle.properties index 797a72e73d0..591603c5813 100644 --- a/docker/registry/manylinux2014_x86_64/gradle.properties +++ b/docker/registry/manylinux2014_x86_64/gradle.properties @@ -1,4 +1,4 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=quay.io/pypa/manylinux2014_x86_64:latest -deephaven.registry.imageId=quay.io/pypa/manylinux2014_x86_64@sha256:1b6743f7785bd1630d5fc985c898b1ac46d64ad7a39173bbfc8ad2ba315a70cf +deephaven.registry.imageId=quay.io/pypa/manylinux2014_x86_64@sha256:24858373a047b97fd3a8fe2ee28709479a828fa0ed88719158b728947eb53270 deephaven.registry.platform=linux/amd64 diff --git a/docker/registry/minio/gradle.properties b/docker/registry/minio/gradle.properties index bb8f5676e94..81eee96edf9 100644 --- a/docker/registry/minio/gradle.properties +++ b/docker/registry/minio/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=minio/minio:latest -deephaven.registry.imageId=minio/minio@sha256:9535594ad4122b7a78c6632788a989b96d9199b483d3bd71a5ceae73a922cdfa +deephaven.registry.imageId=minio/minio@sha256:ac591851803a79aee64bc37f66d77c56b0a4b6e12d9e5356380f4105510f2332 diff --git a/docker/registry/protoc-base/gradle.properties b/docker/registry/protoc-base/gradle.properties index 0ee3a073576..50feb88c208 100644 --- a/docker/registry/protoc-base/gradle.properties +++ b/docker/registry/protoc-base/gradle.properties @@ -1,5 +1,5 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/protoc-base:latest -deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:bbefcc311f3bd93da25d102173dd83a1b5244743186692d5251db71cb7186567 +deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:3cb04f768cfdb66b47154568a23772d42dcc55076c7cbb94ae950309d350a82a # TODO(deephaven-base-images#54): arm64 native image for cpp-client-base deephaven.registry.platform=linux/amd64 diff --git a/docker/registry/python/gradle.properties b/docker/registry/python/gradle.properties index 9a1e9f210a9..eb6c4cf53ef 100644 --- a/docker/registry/python/gradle.properties +++ b/docker/registry/python/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=python:3.10 -deephaven.registry.imageId=python@sha256:fd0fa50d997eb56ce560c6e5ca6a1f5cf8fdff87572a16ac07fb1f5ca01eb608 +deephaven.registry.imageId=python@sha256:941b0bfddbf17d809fd1f457acbf55dfca014e3e0e3d592b1c9070491681bc02 diff --git a/docker/registry/server-base/gradle.properties b/docker/registry/server-base/gradle.properties index 00867537014..ee1fdef9f3b 100644 --- a/docker/registry/server-base/gradle.properties +++ b/docker/registry/server-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:07cc7f322d57cfe86f174b88d6315ce25bac1b798c14ddb9b833b918810f69ef +deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:6885027cbb05b7739a78be26393b1e22085ee1667640c0d9d05a95f8f561d819 diff --git a/docker/registry/slim-base/gradle.properties b/docker/registry/slim-base/gradle.properties index 36a9e765801..7625b2beb98 100644 --- a/docker/registry/slim-base/gradle.properties +++ b/docker/registry/slim-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-slim-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:a01198a4af6bab387a337174fb2babe599e2bd0225a42ff9be31f3052d70e022 +deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:941e783bde091416f4b0359cc2a9df893f8340ada9bf4681f84875a91dfcebd1 diff --git a/docker/server-jetty/src/main/server-jetty/requirements.txt b/docker/server-jetty/src/main/server-jetty/requirements.txt index 19214972003..36c7e02a054 100644 --- a/docker/server-jetty/src/main/server-jetty/requirements.txt +++ b/docker/server-jetty/src/main/server-jetty/requirements.txt @@ -1,5 +1,5 @@ -adbc-driver-manager==1.2.0 -adbc-driver-postgresql==1.2.0 +adbc-driver-manager==1.3.0 +adbc-driver-postgresql==1.3.0 connectorx==0.4.0; platform.machine == 'x86_64' deephaven-plugin==0.6.0 docstring_parser==0.16 diff --git a/docker/server/src/main/server-netty/requirements.txt b/docker/server/src/main/server-netty/requirements.txt index 19214972003..36c7e02a054 100644 --- a/docker/server/src/main/server-netty/requirements.txt +++ b/docker/server/src/main/server-netty/requirements.txt @@ -1,5 +1,5 @@ -adbc-driver-manager==1.2.0 -adbc-driver-postgresql==1.2.0 +adbc-driver-manager==1.3.0 +adbc-driver-postgresql==1.3.0 connectorx==0.4.0; platform.machine == 'x86_64' deephaven-plugin==0.6.0 docstring_parser==0.16 diff --git a/engine/context/src/test/java/io/deephaven/engine/context/TestQueryCompiler.java b/engine/context/src/test/java/io/deephaven/engine/context/TestQueryCompiler.java index d9bca87a113..cddb86f2291 100644 --- a/engine/context/src/test/java/io/deephaven/engine/context/TestQueryCompiler.java +++ b/engine/context/src/test/java/io/deephaven/engine/context/TestQueryCompiler.java @@ -264,18 +264,17 @@ public void testCollidingCompile() throws Exception { @Test public void testMultiCompileWithFailure() throws ExecutionException, InterruptedException { - final String goodProgram = String.join( - "\n", + final String goodProgram = String.join("\n", "public class GoodTest {", " public static void main (String [] args) {", " }", "}"); - final String badProgram = String.join( - "\n", - "public class BadTest {", - " public static void main (String [] args) {", - " }", - "}}"); + final String badProgram = String.join("\n", + "public class Formula {", + " public Formula() {", + " S.badCall(0);", + " }", + "}"); QueryCompilerRequest[] requests = new QueryCompilerRequest[] { QueryCompilerRequest.builder() @@ -299,15 +298,53 @@ public void testMultiCompileWithFailure() throws ExecutionException, Interrupted CompletionStageFuture.make(), }; - try { - ExecutionContext.getContext().getQueryCompiler().compile(requests, resolvers); - // noinspection DataFlowIssue - throw Assert.statementNeverExecuted(); - } catch (Exception ignored) { - } + ExecutionContext.getContext().getQueryCompiler().compile(requests, resolvers); Assert.eqTrue(resolvers[0].getFuture().isDone(), "resolvers[0].getFuture().isDone()"); Assert.eqTrue(resolvers[1].getFuture().isDone(), "resolvers[0].getFuture().isDone()"); Assert.neqNull(resolvers[1].getFuture().get(), "resolvers[1].getFuture().get()"); } + + @Test + public void testMultiCompileWithFailureSecond() throws ExecutionException, InterruptedException { + final String badProgram = String.join("\n", + "public class Formula {", + " public Formula() {", + " S.badCall(0);", + " }", + "}"); + final String goodProgram = String.join("\n", + "public class Formula {", + " public static void main (String [] args) {", + " }", + "}"); + + QueryCompilerRequest[] requests = new QueryCompilerRequest[] { + QueryCompilerRequest.builder() + .description("Test Good Compile") + .className("Formula") + .classBody(goodProgram) + .packageNameRoot("com.deephaven.test") + .build(), + QueryCompilerRequest.builder() + .description("Test Bad Compile") + .className("Formula") + .classBody(badProgram) + .packageNameRoot("com.deephaven.test") + .build(), + }; + + // noinspection unchecked + CompletionStageFuture.Resolver>[] resolvers = + (CompletionStageFuture.Resolver>[]) new CompletionStageFuture.Resolver[] { + CompletionStageFuture.make(), + CompletionStageFuture.make(), + }; + + ExecutionContext.getContext().getQueryCompiler().compile(requests, resolvers); + + Assert.eqTrue(resolvers[1].getFuture().isDone(), "resolvers[0].getFuture().isDone()"); + Assert.eqTrue(resolvers[0].getFuture().isDone(), "resolvers[0].getFuture().isDone()"); + Assert.neqNull(resolvers[0].getFuture().get(), "resolvers[1].getFuture().get()"); + } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/AsOfJoinHelper.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/AsOfJoinHelper.java index 92ee2074d11..47df5e98c26 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/AsOfJoinHelper.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/AsOfJoinHelper.java @@ -1340,8 +1340,10 @@ public void onUpdate(TableUpdate upstream) { @Override protected void destroy() { super.destroy(); - leftStampKeys.close(); - leftStampValues.close(); + getUpdateGraph().runWhenIdle(() -> { + leftStampKeys.close(); + leftStampValues.close(); + }); } }); @@ -1522,8 +1524,10 @@ public void onUpdate(TableUpdate upstream) { @Override protected void destroy() { super.destroy(); - compactedRightStampKeys.close(); - compactedRightStampValues.close(); + getUpdateGraph().runWhenIdle(() -> { + compactedRightStampKeys.close(); + compactedRightStampValues.close(); + }); } }); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/WouldMatchOperation.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/WouldMatchOperation.java index 43e20c8d9b1..91dbc7e323b 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/WouldMatchOperation.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/WouldMatchOperation.java @@ -113,6 +113,21 @@ public SafeCloseable beginOperation(@NotNull final QueryTable parent) { void initializeFilters(@NotNull QueryTable parent) { final QueryCompilerRequestProcessor.BatchProcessor compilationProcessor = QueryCompilerRequestProcessor.batch(); Arrays.stream(whereFilters).forEach(filter -> filter.init(parent.getDefinition(), compilationProcessor)); + + final List disallowedRowVariables = + Arrays.stream(whereFilters).filter(WhereFilter::hasVirtualRowVariables).collect(Collectors.toList()); + if (!disallowedRowVariables.isEmpty()) { + throw new UncheckedTableException( + "wouldMatch filters cannot use virtual row variables (i, ii, and k): " + disallowedRowVariables); + } + + final List disallowedColumnVectors = + Arrays.stream(whereFilters).filter(wf -> !wf.getColumnArrays().isEmpty()).collect(Collectors.toList()); + if (!disallowedColumnVectors.isEmpty()) { + throw new UncheckedTableException( + "wouldMatch filters cannot use column Vectors (_ syntax): " + disallowedColumnVectors); + } + compilationProcessor.compile(); } @@ -517,13 +532,14 @@ private RowSet update(RowSet added, RowSet removed, RowSet modified, try (final SafeCloseableList toClose = new SafeCloseableList()) { // Filter and add addeds - final WritableRowSet filteredAdded = toClose.add(filter.filter(added, source, table, false)); + final WritableRowSet filteredAdded = toClose.add(filter.filter(added, table.getRowSet(), table, false)); RowSet keysToRemove = EMPTY_INDEX; // If we were affected, recompute mods and re-add the ones that pass. if (affected) { downstreamModified.setAll(name); - final RowSet filteredModified = toClose.add(filter.filter(modified, source, table, false)); + final RowSet filteredModified = + toClose.add(filter.filter(modified, table.getRowSet(), table, false)); // Now apply the additions and remove any non-matching modifieds filteredAdded.insert(filteredModified); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/asofjoin/BucketedChunkedAjMergedListener.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/asofjoin/BucketedChunkedAjMergedListener.java index 8496a578178..ebc51824774 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/asofjoin/BucketedChunkedAjMergedListener.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/asofjoin/BucketedChunkedAjMergedListener.java @@ -791,7 +791,9 @@ private RowSet indexFromBuilder(int slotIndex) { @Override protected void destroy() { super.destroy(); - leftSsaFactory.close(); - rightSsaFactory.close(); + getUpdateGraph().runWhenIdle(() -> { + leftSsaFactory.close(); + rightSsaFactory.close(); + }); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java index 2007eee6526..9b9602843f5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/AbstractConditionFilter.java @@ -311,6 +311,11 @@ public boolean hasConstantArrayAccess() { return getFormulaShiftColPair() != null; } + @Override + public boolean hasVirtualRowVariables() { + return usesI || usesII || usesK; + } + /** * @return a Pair object, consisting of formula string and shift to column MatchPairs, if the filter formula or * expression has Array Access that conforms to "i +/- <constant>" or "ii +/- <constant>". If diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java new file mode 100644 index 00000000000..2b096a69002 --- /dev/null +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/FilterSelectColumn.java @@ -0,0 +1,310 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.engine.table.impl.select; + +import io.deephaven.api.filter.Filter; +import io.deephaven.chunk.ChunkType; +import io.deephaven.chunk.WritableChunk; +import io.deephaven.chunk.WritableObjectChunk; +import io.deephaven.chunk.attributes.Values; +import io.deephaven.engine.exceptions.UncheckedTableException; +import io.deephaven.engine.rowset.*; +import io.deephaven.engine.table.*; +import io.deephaven.engine.table.impl.BaseTable; +import io.deephaven.engine.table.impl.MatchPair; +import io.deephaven.engine.table.impl.QueryCompilerRequestProcessor; +import io.deephaven.engine.table.impl.QueryTable; +import io.deephaven.engine.table.impl.sources.InMemoryColumnSource; +import io.deephaven.engine.table.impl.sources.SparseArrayColumnSource; +import io.deephaven.engine.table.impl.sources.ViewColumnSource; +import org.jetbrains.annotations.NotNull; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +/** + * The FilterSelectColumn wraps a {@link Filter} and producing a column of true or false Boolean values described by the + * filter. + *

+ * This SelectColumn is appropriate as an argument to a {@link Table#view(Collection)} or + * {@link Table#updateView(String...)}, lazily evaluating the equivalent of a {@link Table#wouldMatch(String...)} + * operation. Although select and update can also use Filters, wouldMatch provides a more efficient path for realized + * results as it stores and updates only a {@link RowSet}. The FilterSelectColumn can only evaluate the Filter one chunk + * at a time, and must write to an in-memory {@link Boolean} {@link ColumnSource}. + */ +class FilterSelectColumn implements SelectColumn { + + // We don't actually care to do anything, but cannot return null + private static final Formula.FillContext FILL_CONTEXT_INSTANCE = new Formula.FillContext() {}; + + @NotNull + private final String destName; + @NotNull + private final WhereFilter filter; + + private Table tableToFilter; + /** + * We store a copy of our table definition, to ensure that it is identical between initDef and initInputs. + */ + private TableDefinition computedDefinition; + + /** + * Create a FilterSelectColumn with the given name and {@link WhereFilter}. + * + * @param destName the name of the result column + * @param filter the filter that is evaluated to true or false for each row of the table + * @return a new FilterSelectColumn representing the provided filter. + */ + static FilterSelectColumn of(@NotNull final String destName, @NotNull final Filter filter) { + return new FilterSelectColumn(destName, WhereFilter.of(filter)); + } + + private FilterSelectColumn(@NotNull final String destName, @NotNull final WhereFilter filter) { + this.destName = destName; + this.filter = filter; + } + + @Override + public String toString() { + return "filter(" + filter + ')'; + } + + @Override + public List initInputs(final TrackingRowSet rowSet, + final Map> columnsOfInterest) { + tableToFilter = new QueryTable(rowSet, columnsOfInterest); + if (!computedDefinition.equals(tableToFilter.getDefinition())) { + throw new IllegalStateException( + "Definition changed between initDef and initInputs in FilterSelectColumn: initDef=" + + computedDefinition + ", initInputs" + tableToFilter.getDefinition()); + } + return filter.getColumns(); + } + + @Override + public List initDef(@NotNull final Map> columnDefinitionMap) { + filter.init(computedDefinition = TableDefinition.of(columnDefinitionMap.values())); + return checkForInvalidFilters(); + } + + @Override + public List initDef(@NotNull final Map> columnDefinitionMap, + @NotNull final QueryCompilerRequestProcessor compilationRequestProcessor) { + filter.init(computedDefinition = TableDefinition.of(columnDefinitionMap.values()), compilationRequestProcessor); + return checkForInvalidFilters(); + } + + /** + * Validates the filter to ensure it does not contain invalid filters such as column vectors or virtual row + * variables. Throws an {@link UncheckedTableException} if any invalid filters are found. + * + * @return the list of columns required by the filter. + */ + private List checkForInvalidFilters() { + if (!filter.getColumnArrays().isEmpty()) { + throw new UncheckedTableException( + "Cannot use a filter with column Vectors (_ syntax) in select, view, update, or updateView: " + + filter); + } + if (filter.hasVirtualRowVariables()) { + throw new UncheckedTableException( + "Cannot use a filter with virtual row variables (i, ii, or k) in select, view, update, or updateView: " + + filter); + } + if (filter.isRefreshing()) { + /* + * TODO: DH-18052: updateView and view should support refreshing Filter Expressions + * + * This would enable us to use a whereIn or whereNotIn for things like conditional formatting; which could + * be attractive. However, a join or wouldMatch gets you there without the additional complexity. + * + * Supporting this requires SelectColumn dependencies, which have not previously existed. Additionally, if + * we were to support these for select and update (as opposed to view and updateView), then the filter could + * require recomputing the entire result table whenever anything changes. + */ + throw new UncheckedTableException( + "Cannot use a refreshing filter in select, view, update, or updateView: " + filter); + } + + return filter.getColumns(); + } + + @Override + public Class getReturnedType() { + return Boolean.class; + } + + @Override + public Class getReturnedComponentType() { + return null; + } + + @Override + public List getColumns() { + return filter.getColumns(); + } + + @Override + public List getColumnArrays() { + /* This should always be empty, because initDef throws when arrays or virtual row variables are used. */ + return List.of(); + } + + @Override + public boolean hasVirtualRowVariables() { + /* This should always be false, because initDef throws when arrays or ii and friends are used. */ + return false; + } + + @NotNull + @Override + public ColumnSource getDataView() { + return new ViewColumnSource<>(Boolean.class, new FilterFormula(), isStateless()); + } + + @NotNull + @Override + public ColumnSource getLazyView() { + return getDataView(); + } + + @Override + public String getName() { + return destName; + } + + @Override + public MatchPair getMatchPair() { + throw new UnsupportedOperationException(); + } + + @Override + public final WritableColumnSource newDestInstance(final long size) { + return SparseArrayColumnSource.getSparseMemoryColumnSource(size, Boolean.class); + } + + @Override + public final WritableColumnSource newFlatDestInstance(final long size) { + return InMemoryColumnSource.getImmutableMemoryColumnSource(size, Boolean.class, null); + } + + @Override + public boolean isRetain() { + return false; + } + + @Override + public boolean isStateless() { + return filter.permitParallelization(); + } + + @Override + public FilterSelectColumn copy() { + return new FilterSelectColumn(destName, filter.copy()); + } + + @Override + public void validateSafeForRefresh(final BaseTable sourceTable) { + filter.validateSafeForRefresh(sourceTable); + } + + private class FilterFormula extends Formula { + public FilterFormula() { + super(null); + } + + @Override + public Boolean getBoolean(final long rowKey) { + try (final WritableRowSet selection = RowSetFactory.fromKeys(rowKey); + final WritableRowSet filteredRowSet = + filter.filter(selection, tableToFilter.getRowSet(), tableToFilter, false)) { + return filteredRowSet.isNonempty(); + } + } + + @Override + public Boolean getPrevBoolean(final long rowKey) { + try (final WritableRowSet selection = RowSetFactory.fromKeys(rowKey); + final WritableRowSet filteredRowSet = filter.filter(selection, + tableToFilter.getRowSet().prev(), tableToFilter, true)) { + return filteredRowSet.isNonempty(); + } + } + + @Override + public Object get(final long rowKey) { + return getBoolean(rowKey); + } + + @Override + public Object getPrev(final long rowKey) { + return getPrevBoolean(rowKey); + } + + @Override + public ChunkType getChunkType() { + return ChunkType.Object; + } + + @Override + public FillContext makeFillContext(final int chunkCapacity) { + return FILL_CONTEXT_INSTANCE; + } + + @Override + public void fillChunk( + @NotNull final FillContext fillContext, + @NotNull final WritableChunk destination, + @NotNull final RowSequence rowSequence) { + doFill(rowSequence, destination, false); + } + + @Override + public void fillPrevChunk( + @NotNull final FillContext fillContext, + @NotNull final WritableChunk destination, + @NotNull final RowSequence rowSequence) { + doFill(rowSequence, destination, true); + } + + private void doFill(@NotNull final RowSequence rowSequence, final WritableChunk destination, + final boolean usePrev) { + final WritableObjectChunk booleanDestination = destination.asWritableObjectChunk(); + booleanDestination.setSize(rowSequence.intSize()); + final RowSet fullSet = usePrev ? tableToFilter.getRowSet().prev() : tableToFilter.getRowSet(); + + try (final RowSet inputRowSet = rowSequence.asRowSet(); + final RowSet filtered = filter.filter(inputRowSet, fullSet, tableToFilter, usePrev)) { + if (filtered.size() == inputRowSet.size()) { + // if everything matches, short circuit the iteration + booleanDestination.fillWithValue(0, booleanDestination.size(), true); + return; + } + + int offset = 0; + + try (final RowSequence.Iterator inputRows = inputRowSet.getRowSequenceIterator(); + final RowSet.Iterator trueRows = filtered.iterator()) { + while (trueRows.hasNext()) { + final long nextTrue = trueRows.nextLong(); + // Find all the false rows between the last consumed input row and the next true row + final int falsesSkipped = (int) inputRows.advanceAndGetPositionDistance(nextTrue + 1) - 1; + if (falsesSkipped > 0) { + booleanDestination.fillWithValue(offset, falsesSkipped, false); + offset += falsesSkipped; + } + booleanDestination.set(offset++, true); + } + } + + final int remainingFalses = booleanDestination.size() - offset; + // Fill everything else up with false, because we've exhausted the trues + if (remainingFalses > 0) { + booleanDestination.fillWithValue(offset, remainingFalses, false); + } + } + } + } +} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/SelectColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/SelectColumn.java index 249ec9a14f2..63239a0a461 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/SelectColumn.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/SelectColumn.java @@ -278,7 +278,7 @@ public SelectColumn visit(Literal rhs) { @Override public SelectColumn visit(Filter rhs) { - return makeSelectColumn(Strings.of(rhs)); + return FilterSelectColumn.of(lhs.name(), rhs); } @Override diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilter.java index 9dae903ae4e..454cc547020 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/WhereFilter.java @@ -301,6 +301,13 @@ default boolean canMemoize() { return false; } + /** + * Returns true if this filter uses row virtual offset columns of {@code i}, {@code ii} or {@code k}. + */ + default boolean hasVirtualRowVariables() { + return false; + } + /** * Create a copy of this WhereFilter. * @@ -331,7 +338,7 @@ default Filter invert() { @Override default T walk(Expression.Visitor visitor) { - throw new UnsupportedOperationException("WhereFilters do not implement walk"); + return visitor.visit(this); } @Override diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ByteChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ByteChunkColumnSource.java index b52645f878b..efb9942419a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ByteChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ByteChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/CharChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/CharChunkColumnSource.java index 57aeba11fbe..9cf074473dd 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/CharChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/CharChunkColumnSource.java @@ -185,7 +185,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/DoubleChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/DoubleChunkColumnSource.java index d620235038c..6348a6857ce 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/DoubleChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/DoubleChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/FloatChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/FloatChunkColumnSource.java index 908bcc06fb4..eaf6fa88c5d 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/FloatChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/FloatChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/IntChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/IntChunkColumnSource.java index 417c568f5fa..e84825a1264 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/IntChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/IntChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/LongChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/LongChunkColumnSource.java index 9db4a1ec35d..2a703e0bd16 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/LongChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/LongChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ObjectChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ObjectChunkColumnSource.java index 5e6bae02f21..69dbcdb3409 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ObjectChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ObjectChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ShortChunkColumnSource.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ShortChunkColumnSource.java index c89e46d4452..d74ee9050fe 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ShortChunkColumnSource.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/sources/chunkcolumnsource/ShortChunkColumnSource.java @@ -189,7 +189,7 @@ public void addChunk(@NotNull final WritableChunk chunk) { } @Override - public void clear() { + public synchronized void clear() { totalSize = 0; data.forEach(SafeCloseable::close); data.clear(); diff --git a/engine/table/src/main/java/io/deephaven/stream/StreamToBlinkTableAdapter.java b/engine/table/src/main/java/io/deephaven/stream/StreamToBlinkTableAdapter.java index 0543b62b738..08aba9b7f63 100644 --- a/engine/table/src/main/java/io/deephaven/stream/StreamToBlinkTableAdapter.java +++ b/engine/table/src/main/java/io/deephaven/stream/StreamToBlinkTableAdapter.java @@ -19,9 +19,6 @@ import io.deephaven.engine.table.impl.sources.LongAsInstantColumnSource; import io.deephaven.engine.table.impl.sources.NullValueColumnSource; import io.deephaven.engine.table.impl.sources.SwitchColumnSource; -import io.deephaven.engine.updategraph.NotificationQueue; -import io.deephaven.engine.updategraph.UpdateGraph; -import io.deephaven.engine.updategraph.UpdateSourceRegistrar; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.WritableChunk; @@ -29,6 +26,9 @@ import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.rowset.RowSetShiftData; import io.deephaven.engine.rowset.TrackingWritableRowSet; +import io.deephaven.engine.updategraph.NotificationQueue; +import io.deephaven.engine.updategraph.UpdateGraph; +import io.deephaven.engine.updategraph.UpdateSourceRegistrar; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; import io.deephaven.util.MultiException; @@ -40,11 +40,13 @@ import java.lang.ref.WeakReference; import java.time.Instant; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Stream; @@ -271,6 +273,15 @@ private static void maybeClearChunkColumnSource(ColumnSource cs) { } } + private synchronized void clearChunkColumnSources() { + SafeCloseable.closeAll( + Stream.of(bufferChunkSources, currentChunkSources, prevChunkSources) + .filter(Objects::nonNull) + .flatMap(Arrays::stream) + .map(ccs -> ccs::clear)); + bufferChunkSources = currentChunkSources = prevChunkSources = null; + } + /** * Return the {@link Table#BLINK_TABLE_ATTRIBUTE blink} {@link Table table} that this adapter is producing, and * ensure that this StreamToBlinkTableAdapter no longer enforces strong reachability of the result. May return @@ -306,6 +317,7 @@ public void close() { .endl(); updateSourceRegistrar.removeSource(this); streamPublisher.shutdown(); + getUpdateGraph().runWhenIdle(this::clearChunkColumnSources); } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java index c8eb9073dbe..10e1b015cf5 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableSelectUpdateTest.java @@ -3,22 +3,23 @@ // package io.deephaven.engine.table.impl; -import io.deephaven.api.JoinMatch; -import io.deephaven.api.TableOperations; +import io.deephaven.api.*; +import io.deephaven.api.filter.Filter; +import io.deephaven.api.filter.FilterIn; +import io.deephaven.api.literal.Literal; import io.deephaven.base.testing.BaseArrayTestCase; +import io.deephaven.chunk.ObjectChunk; +import io.deephaven.chunk.attributes.Values; import io.deephaven.configuration.Configuration; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.context.QueryScope; +import io.deephaven.engine.exceptions.UncheckedTableException; import io.deephaven.engine.liveness.LivenessScope; import io.deephaven.engine.liveness.LivenessScopeStack; +import io.deephaven.engine.primitive.iterator.CloseableIterator; import io.deephaven.engine.rowset.*; -import io.deephaven.engine.table.ColumnSource; -import io.deephaven.engine.table.ShiftObliviousListener; -import io.deephaven.engine.table.Table; -import io.deephaven.engine.table.impl.select.DhFormulaColumn; -import io.deephaven.engine.table.impl.select.FormulaCompilationException; -import io.deephaven.engine.table.impl.select.SelectColumn; -import io.deephaven.engine.table.impl.select.SelectColumnFactory; +import io.deephaven.engine.table.*; +import io.deephaven.engine.table.impl.select.*; import io.deephaven.engine.table.impl.sources.InMemoryColumnSource; import io.deephaven.engine.table.impl.sources.LongSparseArraySource; import io.deephaven.engine.table.impl.sources.RedirectedColumnSource; @@ -42,9 +43,12 @@ import org.junit.Rule; import org.junit.Test; +import java.text.DecimalFormat; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import static io.deephaven.engine.testutil.TstUtils.*; import static io.deephaven.engine.util.TableTools.*; @@ -1302,6 +1306,249 @@ public void testPropagationOfAttributes() { } } + @Test + public void testFilterExpression() { + final Filter filter = FilterIn.of(ColumnName.of("A"), Literal.of(1), Literal.of(3)); + final Table t = TableTools.newTable(intCol("A", 1, 1, 2, 3, 5, 8)); + final Table wm = t.wouldMatch(new WouldMatchPair("AWM", filter)); + + // use an update + final Table up = t.update(List.of(Selectable.of(ColumnName.of("AWM"), filter))); + assertTableEquals(wm, up); + + // use an updateView + final Table upv = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter))); + assertTableEquals(wm, upv); + + // Test the getBoolean method + assertEquals(true, upv.getColumnSource("AWM").get(t.getRowSet().get(0))); + assertEquals(false, upv.getColumnSource("AWM").get(t.getRowSet().get(2))); + + // and now a more generic WhereFilter + + final Filter filter2 = WhereFilterFactory.getExpression("A == 1 || A==3"); + final Table wm2 = t.wouldMatch(new WouldMatchPair("AWM", filter2)); + + // use an update + final Table up2 = t.update(List.of(Selectable.of(ColumnName.of("AWM"), filter2))); + assertTableEquals(wm2, up2); + + // a Filter where nothing is true, to check that state + final Table upvf = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), Filter.ofFalse()))); + assertTableEquals(t.updateView("AWM=false"), upvf); + + // a Filter where everything is true + final Table upvt = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), Filter.ofTrue()))); + assertTableEquals(t.updateView("AWM=true"), upvt); + + // a Filter where the last value in the chunk is true + final Filter filter3 = WhereFilterFactory.getExpression("A in 8"); + final Table wm3 = t.wouldMatch(new WouldMatchPair("AWM", filter3)); + final Table upv3 = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter3))); + assertTableEquals(wm3, upv3); + } + + @Test + public void testFilterExpressionGetPrev() { + final Filter filter = FilterIn.of(ColumnName.of("A"), Literal.of(2), Literal.of(4)); + final QueryTable t = TstUtils.testRefreshingTable(i(2, 4, 6, 8).toTracking(), intCol("A", 1, 2, 3, 4)); + // noinspection resource + final TrackingWritableRowSet rs = t.getRowSet().writableCast(); + + // use an updateView + final Table upv = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter))); + + // Test the getBoolean method + ColumnSource resultColumn = upv.getColumnSource("AWM"); + assertEquals(false, resultColumn.get(rs.get(0))); + assertEquals(true, resultColumn.get(rs.get(1))); + assertEquals(false, resultColumn.get(rs.get(2))); + assertEquals(true, resultColumn.get(rs.get(3))); + + // and do it with chunks + try (final CloseableIterator awm = upv.columnIterator("AWM")) { + assertEquals(Arrays.asList(false, true, false, true), awm.stream().collect(Collectors.toList())); + } + + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + + updateGraph.runWithinUnitTestCycle(() -> { + assertEquals(false, resultColumn.get(t.getRowSet().get(0))); + assertEquals(true, resultColumn.get(t.getRowSet().get(1))); + assertEquals(false, resultColumn.get(t.getRowSet().get(2))); + assertEquals(true, resultColumn.get(t.getRowSet().get(3))); + + final RowSet prevRowset = rs.prev(); + assertEquals(false, resultColumn.getPrev(prevRowset.get(0))); + assertEquals(true, resultColumn.getPrev(prevRowset.get(1))); + assertEquals(false, resultColumn.getPrev(prevRowset.get(2))); + assertEquals(true, resultColumn.getPrev(prevRowset.get(3))); + + addToTable(t, i(1, 2, 9), intCol("A", 2, 2, 4)); + removeRows(t, i(8)); + rs.insert(i(1, 9)); + rs.remove(8); + t.notifyListeners(i(1, 9), i(8), i()); + + // with a chunk + try (final ChunkSource.GetContext fc = resultColumn.makeGetContext(4)) { + final ObjectChunk prevValues = + resultColumn.getPrevChunk(fc, prevRowset).asObjectChunk(); + assertEquals(false, prevValues.get(0)); + assertEquals(true, prevValues.get(1)); + assertEquals(false, prevValues.get(2)); + assertEquals(true, prevValues.get(3)); + } + + assertEquals(false, resultColumn.getPrev(prevRowset.get(0))); + assertEquals(true, resultColumn.getPrev(prevRowset.get(1))); + assertEquals(false, resultColumn.getPrev(prevRowset.get(2))); + assertEquals(true, resultColumn.getPrev(prevRowset.get(3))); + + assertEquals(true, resultColumn.get(rs.get(0))); + assertEquals(true, resultColumn.get(rs.get(1))); + assertEquals(true, resultColumn.get(rs.get(2))); + assertEquals(false, resultColumn.get(rs.get(3))); + assertEquals(true, resultColumn.get(rs.get(4))); + }); + } + + @Test + public void testFilterExpressionFillChunkPerformance() { + testFilterExpressionFillChunkPerformance(1.0); + testFilterExpressionFillChunkPerformance(.9999); + testFilterExpressionFillChunkPerformance(.999); + testFilterExpressionFillChunkPerformance(.8725); + testFilterExpressionFillChunkPerformance(.75); + testFilterExpressionFillChunkPerformance(.5); + testFilterExpressionFillChunkPerformance(.25); + testFilterExpressionFillChunkPerformance(.125); + testFilterExpressionFillChunkPerformance(0.001); + testFilterExpressionFillChunkPerformance(0.0001); + } + + public void testFilterExpressionFillChunkPerformance(final double density) { + final int numIterations = 1; + final int size = 100_000; + final Filter filter = FilterIn.of(ColumnName.of("A"), Literal.of(1)); + + final Random random = new Random(20241120); + final List values = IntStream.range(0, size).mapToObj(ignored -> random.nextDouble() < density) + .collect(Collectors.toList()); + QueryScope.addParam("values", values); + final Table t = TableTools.emptyTable(size).update("A=(Boolean)(values[i]) ? 1: 0"); + QueryScope.addParam("values", null); + + final Table upv = t.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter))); + final long startTime = System.nanoTime(); + for (int iters = 0; iters < numIterations; ++iters) { + final long trueValues = upv.columnIterator("AWM").stream().filter(x -> (Boolean) x).count(); + assertEquals(values.stream().filter(x -> x).count(), trueValues); + } + final long endTime = System.nanoTime(); + final double duration = endTime - startTime; + System.out.println("Density: " + new DecimalFormat("0.0000").format(density) + ", Nanos: " + (long) duration + + ", per cell=" + new DecimalFormat("0.00").format(duration / (size * numIterations))); + } + + @Test + public void testFilterExpressionArray() { + final Filter filter = WhereFilterFactory.getExpression("A=A_[i-1]"); + final Filter filterArrayOnly = WhereFilterFactory.getExpression("A=A_.size() = 1"); + final Filter filterKonly = WhereFilterFactory.getExpression("A=k+1"); + final QueryTable setTable = TstUtils.testRefreshingTable(intCol("B")); + final Filter whereIn = new DynamicWhereFilter(setTable, true, MatchPairFactory.getExpression("A=B")); + final QueryTable table = TstUtils.testRefreshingTable(intCol("A", 1, 1, 2, 3, 5, 8, 9, 9)); + + final UncheckedTableException wme = Assert.assertThrows(UncheckedTableException.class, + () -> table.wouldMatch(new WouldMatchPair("AWM", filter))); + Assert.assertEquals("wouldMatch filters cannot use virtual row variables (i, ii, and k): [A=A_[i-1]]", + wme.getMessage()); + + final UncheckedTableException wme2 = Assert.assertThrows(UncheckedTableException.class, + () -> table.wouldMatch(new WouldMatchPair("AWM", filterArrayOnly))); + Assert.assertEquals("wouldMatch filters cannot use column Vectors (_ syntax): [A=A_.size() = 1]", + wme2.getMessage()); + + final UncheckedTableException upe = Assert.assertThrows(UncheckedTableException.class, + () -> table.update(List.of(Selectable.of(ColumnName.of("AWM"), filter)))); + Assert.assertEquals( + "Cannot use a filter with column Vectors (_ syntax) in select, view, update, or updateView: A=A_[i-1]", + upe.getMessage()); + + final UncheckedTableException uve = Assert.assertThrows(UncheckedTableException.class, + () -> table.updateView(List.of(Selectable.of(ColumnName.of("AWM"), filter)))); + Assert.assertEquals( + "Cannot use a filter with column Vectors (_ syntax) in select, view, update, or updateView: A=A_[i-1]", + uve.getMessage()); + + final UncheckedTableException se = Assert.assertThrows(UncheckedTableException.class, + () -> table.select(List.of(Selectable.of(ColumnName.of("AWM"), filterKonly)))); + Assert.assertEquals( + "Cannot use a filter with virtual row variables (i, ii, or k) in select, view, update, or updateView: A=k+1", + se.getMessage()); + + final UncheckedTableException ve = Assert.assertThrows(UncheckedTableException.class, + () -> table.view(List.of(Selectable.of(ColumnName.of("AWM"), filterKonly)))); + Assert.assertEquals( + "Cannot use a filter with virtual row variables (i, ii, or k) in select, view, update, or updateView: A=k+1", + ve.getMessage()); + + final UncheckedTableException dw = Assert.assertThrows(UncheckedTableException.class, + () -> table.view(List.of(Selectable.of(ColumnName.of("AWM"), whereIn)))); + Assert.assertEquals( + "Cannot use a refreshing filter in select, view, update, or updateView: DynamicWhereFilter([A=B])", + dw.getMessage()); + + } + + @Test + public void testFilterExpressionTicking() { + for (int seed = 0; seed < 5; ++seed) { + testFilterExpressionTicking(seed, new MutableInt(100)); + } + } + + private void testFilterExpressionTicking(final int seed, final MutableInt numSteps) { + final Random random = new Random(seed); + final ColumnInfo[] columnInfo; + final int size = 25; + final QueryTable queryTable = getTable(size, random, + columnInfo = initColumnInfos(new String[] {"Sym", "intCol", "doubleCol"}, + new SetGenerator<>("a", "b", "c", "d", "e"), + new IntGenerator(10, 100), + new SetGenerator<>(10.1, 20.1, 30.1))); + + final EvalNuggetInterface[] en = new EvalNuggetInterface[] { + new TableComparator(queryTable.wouldMatch("SM=Sym in `b`, `d`"), + queryTable.update(List.of(Selectable.of(ColumnName.of("SM"), + WhereFilterFactory.getExpression("Sym in `b`, `d`"))))), + new TableComparator(queryTable.wouldMatch("SM=Sym in `b`, `d`"), + queryTable.updateView(List.of(Selectable.of(ColumnName.of("SM"), + WhereFilterFactory.getExpression("Sym in `b`, `d`"))))), + new TableComparator(queryTable.wouldMatch("IM=intCol < 50"), + queryTable.update(List.of( + Selectable.of(ColumnName.of("IM"), WhereFilterFactory.getExpression("intCol < 50"))))), + new TableComparator(queryTable.wouldMatch("IM=intCol < 50"), + queryTable.updateView(List.of( + Selectable.of(ColumnName.of("IM"), WhereFilterFactory.getExpression("intCol < 50"))))), + new TableComparator(queryTable.wouldMatch("IM=Sym= (intCol%2 == 0? `a` : `b`)"), + queryTable.update(List.of(Selectable.of(ColumnName.of("IM"), + WhereFilterFactory.getExpression("Sym= (intCol%2 == 0? `a` : `b`)"))))), + new TableComparator(queryTable.wouldMatch("IM=Sym= (intCol%2 == 0? `a` : `b`)"), + queryTable.updateView(List.of(Selectable.of(ColumnName.of("IM"), + WhereFilterFactory.getExpression("Sym= (intCol%2 == 0? `a` : `b`)"))))), + }; + + final int maxSteps = numSteps.get(); + for (numSteps.set(0); numSteps.get() < maxSteps; numSteps.increment()) { + if (RefreshingTableTestCase.printTableUpdates) { + System.out.println("Step = " + numSteps.get()); + } + RefreshingTableTestCase.simulateShiftAwareStep(size, random, queryTable, columnInfo, en); + } + } + @Test public void testAlwaysUpdate() { final MutableInt count = new MutableInt(0); diff --git a/engine/table/src/test/java/io/deephaven/stream/TestStreamToBlinkTableAdapter.java b/engine/table/src/test/java/io/deephaven/stream/TestStreamToBlinkTableAdapter.java index 5dee5f43d28..26e4928b0a4 100644 --- a/engine/table/src/test/java/io/deephaven/stream/TestStreamToBlinkTableAdapter.java +++ b/engine/table/src/test/java/io/deephaven/stream/TestStreamToBlinkTableAdapter.java @@ -4,7 +4,11 @@ package io.deephaven.stream; import io.deephaven.chunk.attributes.Values; +import io.deephaven.chunk.util.pools.ChunkPoolConstants; +import io.deephaven.chunk.util.pools.ChunkPoolReleaseTracking; import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.engine.liveness.LivenessScope; +import io.deephaven.engine.liveness.LivenessScopeStack; import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.rowset.RowSetShiftData; import io.deephaven.engine.table.Table; @@ -18,6 +22,7 @@ import io.deephaven.engine.table.impl.SimpleListener; import io.deephaven.chunk.*; import io.deephaven.util.BooleanUtils; +import io.deephaven.util.SafeCloseable; import io.deephaven.util.type.ArrayTypeUtils; import junit.framework.TestCase; import org.apache.commons.lang3.mutable.MutableBoolean; @@ -446,6 +451,34 @@ public void onFailureInternal(Throwable originalException, Entry sourceEntry) { TestCase.assertTrue(listenerFailed.booleanValue()); } + @Test + public void testCleanup() { + final TableDefinition tableDefinition = TableDefinition.from( + List.of("O", "B", "S", "I", "L", "F", "D", "C"), + List.of(String.class, byte.class, short.class, int.class, long.class, float.class, double.class, + char.class)); + final Table tableToAdd = emptyTable(ChunkPoolConstants.SMALLEST_POOLED_CHUNK_CAPACITY).updateView( + "O=Long.toString(ii)", "B=(byte)ii", "S=(short)ii", "I=(int)ii", "L=ii", "F=(float)ii", + "D=(double)ii", "C=(char)ii"); + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + try (final SafeCloseable ignored = LivenessScopeStack.open(new LivenessScope(true), true)) { + final TablePublisher tablePublisher = TablePublisher.of("Test", tableDefinition, null, null); + // Add buffered chunks + tablePublisher.add(tableToAdd); + // Move buffered chunks to current + updateGraph.runWithinUnitTestCycle(() -> { + }); + // Add more buffered chunks + tablePublisher.add(tableToAdd); + // Move current to previous, buffered to current + updateGraph.runWithinUnitTestCycle(() -> { + }); + // Add even more buffered chunks + tablePublisher.add(tableToAdd); + } + ChunkPoolReleaseTracking.check(); + } + private static class DummyStreamPublisher implements StreamPublisher { private boolean fail; diff --git a/engine/updategraph/src/main/java/io/deephaven/engine/updategraph/UpdateGraph.java b/engine/updategraph/src/main/java/io/deephaven/engine/updategraph/UpdateGraph.java index c29efed8408..b546758e5e1 100644 --- a/engine/updategraph/src/main/java/io/deephaven/engine/updategraph/UpdateGraph.java +++ b/engine/updategraph/src/main/java/io/deephaven/engine/updategraph/UpdateGraph.java @@ -6,6 +6,7 @@ import io.deephaven.base.log.LogOutput; import io.deephaven.base.verify.Assert; import io.deephaven.io.log.LogEntry; +import io.deephaven.util.annotations.FinalDefault; import io.deephaven.util.function.ThrowingSupplier; import io.deephaven.util.locks.AwareFunctionalLock; import org.jetbrains.annotations.NotNull; @@ -244,4 +245,24 @@ public LogOutput append(LogOutput output) { } // endregion refresh control + + /** + * Run {@code task} immediately if this UpdateGraph is currently idle, else schedule {@code task} to run at a later + * time when it has become idle. + * + * @param task The task to run when idle + */ + @FinalDefault + default void runWhenIdle(@NotNull final Runnable task) { + if (clock().currentState() == LogicalClock.State.Idle) { + task.run(); + } else { + addNotification(new TerminalNotification() { + @Override + public void run() { + task.run(); + } + }); + } + } } diff --git a/extensions/iceberg/s3/src/main/java/io/deephaven/iceberg/util/IcebergToolsS3.java b/extensions/iceberg/s3/src/main/java/io/deephaven/iceberg/util/IcebergToolsS3.java index 22e25a2a23c..ef4f98b5d11 100644 --- a/extensions/iceberg/s3/src/main/java/io/deephaven/iceberg/util/IcebergToolsS3.java +++ b/extensions/iceberg/s3/src/main/java/io/deephaven/iceberg/util/IcebergToolsS3.java @@ -13,7 +13,9 @@ import org.apache.iceberg.aws.AwsProperties; import org.apache.iceberg.aws.HttpClientProperties; import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.aws.s3.S3FileIO; import org.apache.iceberg.aws.s3.S3FileIOProperties; +import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.rest.RESTCatalog; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -22,9 +24,8 @@ import java.util.Map; /** - * Tools for accessing tables in the Iceberg table format. + * Tools for accessing tables in the Iceberg table format from S3. */ -@SuppressWarnings("unused") public final class IcebergToolsS3 { /** @@ -52,14 +53,6 @@ public static IcebergCatalogAdapter createS3Rest( // Set up the properties map for the Iceberg catalog final Map properties = new HashMap<>(); - - final RESTCatalog catalog = new RESTCatalog(); - - properties.put(CatalogProperties.CATALOG_IMPL, catalog.getClass().getName()); - properties.put(CatalogProperties.URI, catalogURI); - properties.put(CatalogProperties.WAREHOUSE_LOCATION, warehouseLocation); - - // Configure the properties map from the Iceberg instructions. if (!Strings.isNullOrEmpty(accessKeyId) && !Strings.isNullOrEmpty(secretAccessKey)) { properties.put(S3FileIOProperties.ACCESS_KEY_ID, accessKeyId); properties.put(S3FileIOProperties.SECRET_ACCESS_KEY, secretAccessKey); @@ -71,10 +64,9 @@ public static IcebergCatalogAdapter createS3Rest( properties.put(S3FileIOProperties.ENDPOINT, endpointOverride); } - final String catalogName = name != null ? name : "IcebergCatalog-" + catalogURI; + final RESTCatalog catalog = new RESTCatalog(); catalog.setConf(new Configuration()); - catalog.initialize(catalogName, properties); - return IcebergCatalogAdapter.of(catalog); + return createAdapterCommon(name, catalogURI, warehouseLocation, catalog, properties); } /** @@ -97,16 +89,28 @@ public static IcebergCatalogAdapter createGlue( final Map properties = new HashMap<>(); final GlueCatalog catalog = new GlueCatalog(); + catalog.setConf(new Configuration()); + return createAdapterCommon(name, catalogURI, warehouseLocation, catalog, properties); + } + private static IcebergCatalogAdapter createAdapterCommon( + @Nullable final String name, + @NotNull final String catalogURI, + @NotNull final String warehouseLocation, + @NotNull final Catalog catalog, + @NotNull final Map properties) { properties.put(CatalogProperties.CATALOG_IMPL, catalog.getClass().getName()); properties.put(CatalogProperties.URI, catalogURI); properties.put(CatalogProperties.WAREHOUSE_LOCATION, warehouseLocation); + // Following is needed to write new manifest files when writing new data. + // Not setting this will result in using ResolvingFileIO. + properties.put(CatalogProperties.FILE_IO_IMPL, S3FileIO.class.getName()); + final String catalogName = name != null ? name : "IcebergCatalog-" + catalogURI; - catalog.setConf(new Configuration()); catalog.initialize(catalogName, properties); - return new IcebergCatalogAdapter(catalog, properties); + return IcebergCatalogAdapter.of(catalog, properties); } /** diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java index fbd36ce1f77..0920d803fae 100644 --- a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergToolsTest.java @@ -15,9 +15,11 @@ import io.deephaven.extensions.s3.S3Instructions; import io.deephaven.iceberg.TestCatalog.IcebergTestCatalog; import org.apache.iceberg.Schema; +import io.deephaven.iceberg.base.IcebergUtils; import org.apache.iceberg.Snapshot; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.types.Type; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -31,6 +33,7 @@ import java.io.File; import java.math.BigDecimal; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; @@ -99,7 +102,7 @@ public abstract class IcebergToolsTest { ColumnDefinition.ofString("ColumnType"), ColumnDefinition.ofBoolean("IsPartitioning")); - IcebergReadInstructions instructions; + private IcebergReadInstructions instructions; public abstract S3AsyncClient s3AsyncClient(); @@ -115,10 +118,10 @@ public abstract class IcebergToolsTest { private String warehousePath; private IcebergTestCatalog resourceCatalog; - public final EngineCleanup framework = new EngineCleanup(); + private final EngineCleanup framework = new EngineCleanup(); @BeforeEach - public void setUp() throws Exception { + void setUp() throws Exception { framework.setUp(); bucket = "warehouse"; asyncClient = s3AsyncClient(); @@ -137,7 +140,7 @@ public void setUp() throws Exception { } @AfterEach - public void tearDown() throws Exception { + void tearDown() throws Exception { resourceCatalog.close(); for (String key : keys) { asyncClient.deleteObject(DeleteObjectRequest.builder().bucket(bucket).key(key).build()).get(); @@ -195,7 +198,7 @@ private void uploadSalesRenamed() throws ExecutionException, InterruptedExceptio } @Test - public void testListNamespaces() { + void testListNamespaces() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final Collection namespaces = adapter.listNamespaces(); @@ -212,7 +215,7 @@ public void testListNamespaces() { } @Test - public void testListTables() { + void testListTables() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final Namespace ns = Namespace.of("sales"); @@ -236,7 +239,7 @@ public void testListTables() { } @Test - public void testGetTableAdapter() { + void testGetTableAdapter() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); // Test the overloads of the load() method. @@ -252,7 +255,7 @@ public void testGetTableAdapter() { } @Test - public void testListSnapshots() { + void testListSnapshots() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final IcebergTableAdapter tableAdapter = adapter.loadTable("sales.sales_multi"); @@ -275,7 +278,7 @@ public void testListSnapshots() { } @Test - public void testOpenTableA() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableA() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -301,7 +304,7 @@ public void testOpenTableB() throws ExecutionException, InterruptedException, Ti } @Test - public void testOpenTableC() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableC() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesSingle(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -314,7 +317,7 @@ public void testOpenTableC() throws ExecutionException, InterruptedException, Ti } @Test - public void testOpenTableS3Only() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableS3Only() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -327,7 +330,7 @@ public void testOpenTableS3Only() throws ExecutionException, InterruptedExceptio } @Test - public void testOpenTableDefinition() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableDefinition() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -340,7 +343,7 @@ public void testOpenTableDefinition() throws ExecutionException, InterruptedExce } @Test - public void testOpenTablePartitionTypeException() { + void testOpenTablePartitionTypeException() { final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofLong("year").withPartitioning(), ColumnDefinition.ofInt("month").withPartitioning(), @@ -372,7 +375,7 @@ public void testOpenTablePartitionTypeException() { } @Test - public void testOpenTableDefinitionRename() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableDefinitionRename() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final TableDefinition renamed = TableDefinition.of( @@ -406,7 +409,7 @@ public void testOpenTableDefinitionRename() throws ExecutionException, Interrupt } @Test - public void testSkippedPartitioningColumn() throws ExecutionException, InterruptedException, TimeoutException { + void testSkippedPartitioningColumn() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final TableDefinition tableDef = TableDefinition.of( @@ -434,7 +437,7 @@ public void testSkippedPartitioningColumn() throws ExecutionException, Interrupt @Test - public void testReorderedPartitioningColumn() throws ExecutionException, InterruptedException, TimeoutException { + void testReorderedPartitioningColumn() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final TableDefinition tableDef = TableDefinition.of( @@ -461,7 +464,7 @@ public void testReorderedPartitioningColumn() throws ExecutionException, Interru } @Test - public void testZeroPartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { + void testZeroPartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final IcebergReadInstructions localInstructions = IcebergReadInstructions.builder() @@ -479,7 +482,7 @@ public void testZeroPartitioningColumns() throws ExecutionException, Interrupted } @Test - public void testIncorrectPartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { + void testIncorrectPartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofInt("month").withPartitioning(), ColumnDefinition.ofInt("year").withPartitioning(), @@ -512,7 +515,7 @@ public void testIncorrectPartitioningColumns() throws ExecutionException, Interr } @Test - public void testMissingPartitioningColumns() { + void testMissingPartitioningColumns() { final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofInt("__year").withPartitioning(), // Incorrect name ColumnDefinition.ofInt("__month").withPartitioning(), // Incorrect name @@ -544,7 +547,7 @@ public void testMissingPartitioningColumns() { } @Test - public void testOpenTableColumnRename() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableColumnRename() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); final IcebergReadInstructions localInstructions = IcebergReadInstructions.builder() @@ -562,7 +565,7 @@ public void testOpenTableColumnRename() throws ExecutionException, InterruptedEx } @Test - public void testOpenTableColumnLegalization() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableColumnLegalization() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesRenamed(); final IcebergReadInstructions localInstructions = IcebergReadInstructions.builder() @@ -579,7 +582,7 @@ public void testOpenTableColumnLegalization() throws ExecutionException, Interru } @Test - public void testOpenTableColumnLegalizationRename() + void testOpenTableColumnLegalizationRename() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesRenamed(); @@ -607,7 +610,7 @@ public void testOpenTableColumnLegalizationRename() } @Test - public void testOpenTableColumnLegalizationPartitionException() { + void testOpenTableColumnLegalizationPartitionException() { final TableDefinition tableDef = TableDefinition.of( ColumnDefinition.ofInt("Year").withPartitioning(), ColumnDefinition.ofInt("Month").withPartitioning()); @@ -636,7 +639,7 @@ public void testOpenTableColumnLegalizationPartitionException() { } @Test - public void testOpenTableColumnRenamePartitioningColumns() + void testOpenTableColumnRenamePartitioningColumns() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesPartitioned(); @@ -666,7 +669,7 @@ public void testOpenTableColumnRenamePartitioningColumns() } @Test - public void testOpenTableSnapshot() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableSnapshot() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesMulti(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -706,7 +709,7 @@ public void testOpenTableSnapshot() throws ExecutionException, InterruptedExcept } @Test - public void testOpenTableSnapshotByID() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenTableSnapshotByID() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesMulti(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -748,7 +751,7 @@ public void testOpenTableSnapshotByID() throws ExecutionException, InterruptedEx } @Test - public void testOpenAllTypesTable() throws ExecutionException, InterruptedException, TimeoutException { + void testOpenAllTypesTable() throws ExecutionException, InterruptedException, TimeoutException { uploadAllTypes(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -761,7 +764,7 @@ public void testOpenAllTypesTable() throws ExecutionException, InterruptedExcept } @Test - public void testTableDefinition() { + void testTableDefinition() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final IcebergTableAdapter tableAdapter = adapter.loadTable("sales.sales_multi"); final List snapshots = tableAdapter.listSnapshots(); @@ -788,7 +791,7 @@ public void testTableDefinition() { } @Test - public void testTableSchema() { + void testTableSchema() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final IcebergTableAdapter tableAdapter = adapter.loadTable("sales.sales_multi"); @@ -810,7 +813,7 @@ public void testTableSchema() { } @Test - public void testTableDefinitionTable() { + void testTableDefinitionTable() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final IcebergTableAdapter tableAdapter = adapter.loadTable("sales.sales_multi"); final List snapshots = tableAdapter.listSnapshots(); @@ -842,7 +845,7 @@ public void testTableDefinitionTable() { } @Test - public void testTableDefinitionWithInstructions() { + void testTableDefinitionWithInstructions() { final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); final IcebergTableAdapter tableAdapter = adapter.loadTable("sales.sales_multi"); @@ -884,7 +887,7 @@ public void testTableDefinitionWithInstructions() { } @Test - public void testManualRefreshingTable() throws ExecutionException, InterruptedException, TimeoutException { + void testManualRefreshingTable() throws ExecutionException, InterruptedException, TimeoutException { uploadSalesMulti(); final IcebergCatalogAdapter adapter = IcebergTools.createAdapter(resourceCatalog); @@ -925,4 +928,23 @@ public void testManualRefreshingTable() throws ExecutionException, InterruptedEx updateGraph.runWithinUnitTestCycle(table::refresh); Assert.eq(table.size(), "table.size()", 0, "expected rows in the table"); } + + @Test + void testConvertToIcebergTypeAndBack() { + final Class[] javaTypes = { + Boolean.class, double.class, float.class, int.class, long.class, String.class, Instant.class, + LocalDateTime.class, LocalDate.class, LocalTime.class, byte[].class + }; + + for (final Class javaType : javaTypes) { + // Java type -> Iceberg type + final Type icebergType = IcebergUtils.convertToIcebergType(javaType); + + // Iceberg type -> Deephaven type + final io.deephaven.qst.type.Type deephavenType = IcebergUtils.convertToDHType(icebergType); + + // Deephaven type == Java type + Assert.eq(javaType, javaType.getName(), deephavenType.clazz(), deephavenType.clazz().getName()); + } + } } diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergWriteInstructionsTest.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergWriteInstructionsTest.java new file mode 100644 index 00000000000..631fea540f1 --- /dev/null +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/IcebergWriteInstructionsTest.java @@ -0,0 +1,51 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import io.deephaven.engine.table.Table; +import io.deephaven.engine.util.TableTools; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; +import static org.assertj.core.api.Assertions.assertThat; + +class IcebergWriteInstructionsTest { + + @Test + void testSetDhTables() { + final Table table1 = TableTools.emptyTable(3); + final Table table2 = TableTools.emptyTable(4); + final IcebergWriteInstructions instructions = IcebergWriteInstructions.builder() + .addTables(table1) + .addTables(table2) + .build(); + assertThat(instructions.tables()).hasSize(2); + assertThat(instructions.tables()).contains(table1); + assertThat(instructions.tables()).contains(table2); + } + + @Test + void testSetPartitionPaths() { + final Table table1 = TableTools.emptyTable(3); + final String pp1 = "P1C=1/PC2=2"; + final Table table2 = TableTools.emptyTable(4); + final String pp2 = "P1C=2/PC2=3"; + try { + final IcebergWriteInstructions instructions = IcebergWriteInstructions.builder() + .addPartitionPaths(pp1, pp2) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (final IllegalArgumentException e) { + assertThat(e).hasMessageContaining("Partition path must be provided for each table"); + } + + final IcebergWriteInstructions instructions = IcebergWriteInstructions.builder() + .addTables(table1, table2) + .addPartitionPaths(pp1, pp2) + .build(); + assertThat(instructions.partitionPaths()).hasSize(2); + assertThat(instructions.partitionPaths()).contains(pp1); + assertThat(instructions.partitionPaths()).contains(pp2); + } +} diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/LocalstackWarehouseSqliteCatalogTest.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/LocalstackWarehouseSqliteCatalogTest.java index fb9c8e020f7..29b27a8485f 100644 --- a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/LocalstackWarehouseSqliteCatalogTest.java +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/LocalstackWarehouseSqliteCatalogTest.java @@ -9,17 +9,20 @@ import org.junit.jupiter.api.Tag; import software.amazon.awssdk.services.s3.S3AsyncClient; +import java.time.Duration; + @Tag("testcontainers") -public final class LocalstackWarehouseSqliteCatalogTest extends S3WarehouseSqliteCatalogBase { +final class LocalstackWarehouseSqliteCatalogTest extends S3WarehouseSqliteCatalogBase { @BeforeAll - public static void initContainer() { + static void initContainer() { // ensure container is started so container startup time isn't associated with a specific test LocalStack.init(); } @Override public S3Instructions s3Instructions() { - return LocalStack.s3Instructions(S3Instructions.builder()).build(); + return LocalStack.s3Instructions(S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))).build(); } @Override diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/MinioWarehouseSqliteCatalogTest.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/MinioWarehouseSqliteCatalogTest.java index 1a569ab88ab..ed4f8560c57 100644 --- a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/MinioWarehouseSqliteCatalogTest.java +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/MinioWarehouseSqliteCatalogTest.java @@ -11,10 +11,12 @@ import org.junit.jupiter.api.Tag; import software.amazon.awssdk.services.s3.S3AsyncClient; +import java.time.Duration; + @Tag("testcontainers") -public final class MinioWarehouseSqliteCatalogTest extends S3WarehouseSqliteCatalogBase { +final class MinioWarehouseSqliteCatalogTest extends S3WarehouseSqliteCatalogBase { @BeforeAll - public static void initContainer() { + static void initContainer() { // TODO(deephaven-core#5116): MinIO testcontainers does not work on OS X Assumptions.assumeFalse(OSUtil.runningMacOS(), "OSUtil.runningMacOS()"); // ensure container is started so container startup time isn't associated with a specific test @@ -23,7 +25,8 @@ public static void initContainer() { @Override public S3Instructions s3Instructions() { - return MinIO.s3Instructions(S3Instructions.builder()).build(); + return MinIO.s3Instructions(S3Instructions.builder() + .readTimeout(Duration.ofSeconds(10))).build(); } @Override diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/S3WarehouseSqliteCatalogBase.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/S3WarehouseSqliteCatalogBase.java index acd529037f5..8beca64e938 100644 --- a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/S3WarehouseSqliteCatalogBase.java +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/S3WarehouseSqliteCatalogBase.java @@ -20,12 +20,17 @@ import static io.deephaven.extensions.s3.testlib.S3Helper.TIMEOUT_SECONDS; -public abstract class S3WarehouseSqliteCatalogBase extends SqliteCatalogBase { +abstract class S3WarehouseSqliteCatalogBase extends SqliteCatalogBase { public abstract S3Instructions s3Instructions(); public abstract S3AsyncClient s3AsyncClient(); + @Override + public final Object dataInstructions() { + return s3Instructions(); + } + @Override protected IcebergCatalogAdapter catalogAdapter(TestInfo testInfo, Path rootDir, Map properties) throws ExecutionException, InterruptedException, TimeoutException { diff --git a/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/TableParquetWriterOptionsTest.java b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/TableParquetWriterOptionsTest.java new file mode 100644 index 00000000000..d6d555321c0 --- /dev/null +++ b/extensions/iceberg/s3/src/test/java/io/deephaven/iceberg/util/TableParquetWriterOptionsTest.java @@ -0,0 +1,160 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import io.deephaven.engine.table.ColumnDefinition; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.parquet.table.ParquetInstructions; +import org.junit.jupiter.api.Test; + +import java.util.Map; + +import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +class TableParquetWriterOptionsTest { + + /** + * Create a new TableParquetWriterOptions builder with an empty table definition. + */ + private static TableParquetWriterOptions.Builder instructions() { + return TableParquetWriterOptions.builder().tableDefinition(TableDefinition.of( + ColumnDefinition.ofInt("someCol"))); + } + + @Test + void defaults() { + final TableParquetWriterOptions instructions = instructions().build(); + assertThat(instructions.dataInstructions()).isEmpty(); + assertThat(instructions.compressionCodecName()).isEqualTo("SNAPPY"); + assertThat(instructions.maximumDictionaryKeys()).isEqualTo(1048576); + assertThat(instructions.maximumDictionarySize()).isEqualTo(1048576); + assertThat(instructions.targetPageSize()).isEqualTo(65536); + } + + @Test + void testSetTableDefinition() { + final TableDefinition definition = TableDefinition.of( + ColumnDefinition.ofInt("PC1").withPartitioning(), + ColumnDefinition.ofInt("PC2").withPartitioning(), + ColumnDefinition.ofLong("I")); + assertThat(TableParquetWriterOptions.builder() + .tableDefinition(definition) + .build() + .tableDefinition()) + .isEqualTo(definition); + } + + @Test + void testEmptyTableDefinition() { + try { + TableParquetWriterOptions.builder() + .tableDefinition(TableDefinition.of()) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("table definition"); + } + } + + @Test + void testSetCompressionCodecName() { + assertThat(instructions() + .compressionCodecName("GZIP") + .build() + .compressionCodecName()) + .isEqualTo("GZIP"); + } + + @Test + void testSetMaximumDictionaryKeys() { + assertThat(instructions() + .maximumDictionaryKeys(100) + .build() + .maximumDictionaryKeys()) + .isEqualTo(100); + } + + @Test + void testSetMaximumDictionarySize() { + assertThat(instructions() + .maximumDictionarySize(100) + .build() + .maximumDictionarySize()) + .isEqualTo(100); + } + + @Test + void testSetTargetPageSize() { + assertThat(instructions() + .targetPageSize(1 << 20) + .build() + .targetPageSize()) + .isEqualTo(1 << 20); + } + + @Test + void testMinMaximumDictionaryKeys() { + + try { + instructions() + .maximumDictionaryKeys(-1) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("maximumDictionaryKeys"); + } + } + + @Test + void testMinMaximumDictionarySize() { + try { + instructions() + .maximumDictionarySize(-1) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("maximumDictionarySize"); + } + } + + @Test + void testMinTargetPageSize() { + try { + instructions() + .targetPageSize(1024) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessageContaining("targetPageSize"); + } + } + + @Test + void toParquetInstructionTest() { + final TableParquetWriterOptions writeInstructions = instructions() + .compressionCodecName("GZIP") + .maximumDictionaryKeys(100) + .maximumDictionarySize(200) + .targetPageSize(1 << 20) + .build(); + final TableDefinition definition = TableDefinition.of( + ColumnDefinition.ofInt("PC1").withPartitioning(), + ColumnDefinition.ofInt("PC2").withPartitioning(), + ColumnDefinition.ofLong("I")); + final Map fieldIdToName = Map.of(2, "field2", 3, "field3"); + final ParquetInstructions parquetInstructions = writeInstructions.toParquetInstructions( + null, definition, fieldIdToName); + + assertThat(parquetInstructions.getCompressionCodecName()).isEqualTo("GZIP"); + assertThat(parquetInstructions.getMaximumDictionaryKeys()).isEqualTo(100); + assertThat(parquetInstructions.getMaximumDictionarySize()).isEqualTo(200); + assertThat(parquetInstructions.getTargetPageSize()).isEqualTo(1 << 20); + assertThat(parquetInstructions.getFieldId("field1")).isEmpty(); + assertThat(parquetInstructions.getFieldId("field2")).hasValue(2); + assertThat(parquetInstructions.getFieldId("field3")).hasValue(3); + assertThat(parquetInstructions.onWriteCompleted()).isEmpty(); + assertThat(parquetInstructions.getTableDefinition()).hasValue(definition); + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/base/IcebergUtils.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/base/IcebergUtils.java new file mode 100644 index 00000000000..a0607d671db --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/base/IcebergUtils.java @@ -0,0 +1,314 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.base; + +import io.deephaven.engine.table.ColumnDefinition; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.iceberg.util.IcebergReadInstructions; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.ManifestContent; +import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.ManifestFiles; +import org.apache.iceberg.PartitionField; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.math.BigDecimal; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public final class IcebergUtils { + + private static final Map, Type> DH_TO_ICEBERG_TYPE_MAP = new HashMap<>(); + + static { + DH_TO_ICEBERG_TYPE_MAP.put(Boolean.class, Types.BooleanType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(double.class, Types.DoubleType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(float.class, Types.FloatType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(int.class, Types.IntegerType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(long.class, Types.LongType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(String.class, Types.StringType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(Instant.class, Types.TimestampType.withZone()); + DH_TO_ICEBERG_TYPE_MAP.put(LocalDateTime.class, Types.TimestampType.withoutZone()); + DH_TO_ICEBERG_TYPE_MAP.put(LocalDate.class, Types.DateType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(LocalTime.class, Types.TimeType.get()); + DH_TO_ICEBERG_TYPE_MAP.put(byte[].class, Types.BinaryType.get()); + // TODO (deephaven-core#6327) Add support for more types like ZonedDateTime, Big Decimals, and Lists + } + + /** + * Get a stream of all {@link DataFile} objects from the given {@link Table} and {@link Snapshot}. + * + * @param table The {@link Table} to retrieve data files for. + * @param snapshot The {@link Snapshot} to retrieve data files from. + * + * @return A stream of {@link DataFile} objects. + */ + public static Stream allDataFiles(@NotNull final Table table, @NotNull final Snapshot snapshot) { + return allManifestFiles(table, snapshot) + .map(manifestFile -> ManifestFiles.read(manifestFile, table.io())) + .flatMap(IcebergUtils::toStream); + } + + /** + * Get a stream of all {@link ManifestFile} objects from the given {@link Table} and {@link Snapshot}. + * + * @param table The {@link Table} to retrieve manifest files for. + * @param snapshot The {@link Snapshot} to retrieve manifest files from. + * + * @return A stream of {@link ManifestFile} objects. + */ + public static Stream allManifestFiles(@NotNull final Table table, @NotNull final Snapshot snapshot) { + return allManifests(table, snapshot).stream() + .peek(manifestFile -> { + if (manifestFile.content() != ManifestContent.DATA) { + throw new TableDataException( + String.format( + "%s:%d - only DATA manifest files are currently supported, encountered %s", + table, snapshot.snapshotId(), manifestFile.content())); + } + }); + } + + /** + * Retrieves a {@link List} of manifest files from the given {@link Table} and {@link Snapshot}. + * + * @param table The {@link Table} to retrieve manifest files for. + * @param snapshot The {@link Snapshot} to retrieve manifest files from. + * + * @return A {@link List} of {@link ManifestFile} objects. + * @throws TableDataException if there is an error retrieving the manifest files. + */ + static List allManifests(@NotNull final Table table, @NotNull final Snapshot snapshot) { + try { + return snapshot.allManifests(table.io()); + } catch (final RuntimeException e) { + throw new TableDataException( + String.format("%s:%d - error retrieving manifest files", table, snapshot.snapshotId()), e); + } + } + + /** + * Convert a {@link org.apache.iceberg.io.CloseableIterable} to a {@link Stream} that will close the iterable when + * the stream is closed. + */ + public static Stream toStream(final org.apache.iceberg.io.CloseableIterable iterable) { + return StreamSupport.stream(iterable.spliterator(), false).onClose(() -> { + try { + iterable.close(); + } catch (final IOException e) { + throw new UncheckedIOException(e); + } + }); + } + + /** + * Convert an Iceberg data type to a Deephaven type. + * + * @param icebergType The Iceberg data type to be converted. + * @return The converted Deephaven type. + */ + public static io.deephaven.qst.type.Type convertToDHType(@NotNull final Type icebergType) { + final Type.TypeID typeId = icebergType.typeId(); + switch (typeId) { + case BOOLEAN: + return io.deephaven.qst.type.Type.booleanType().boxedType(); + case DOUBLE: + return io.deephaven.qst.type.Type.doubleType(); + case FLOAT: + return io.deephaven.qst.type.Type.floatType(); + case INTEGER: + return io.deephaven.qst.type.Type.intType(); + case LONG: + return io.deephaven.qst.type.Type.longType(); + case STRING: + return io.deephaven.qst.type.Type.stringType(); + case TIMESTAMP: + final Types.TimestampType timestampType = (Types.TimestampType) icebergType; + if (timestampType == Types.TimestampType.withZone()) { + return io.deephaven.qst.type.Type.find(Instant.class); + } + return io.deephaven.qst.type.Type.find(LocalDateTime.class); + case DATE: + return io.deephaven.qst.type.Type.find(LocalDate.class); + case TIME: + return io.deephaven.qst.type.Type.find(LocalTime.class); + case DECIMAL: + return io.deephaven.qst.type.Type.find(BigDecimal.class); + case FIXED: // Fall through + case BINARY: + return io.deephaven.qst.type.Type.find(byte[].class); + case UUID: // Fall through + case STRUCT: // Fall through + case LIST: // Fall through + case MAP: // Fall through + default: + throw new TableDataException("Unsupported iceberg column type " + typeId.name()); + } + } + + /** + * Convert a Deephaven type to an Iceberg type. + * + * @param columnType The Deephaven type to be converted. + * @return The converted Iceberg type. + */ + public static Type convertToIcebergType(final Class columnType) { + final Type icebergType = DH_TO_ICEBERG_TYPE_MAP.get(columnType); + if (icebergType != null) { + return icebergType; + } else { + throw new TableDataException("Unsupported deephaven column type " + columnType.getName()); + } + } + + /** + * Used to hold a {@link Schema}, {@link PartitionSpec} and {@link IcebergReadInstructions} together. + */ + public static final class SpecAndSchema { + public final Schema schema; + public final PartitionSpec partitionSpec; + public final IcebergReadInstructions readInstructions; + + public SpecAndSchema( + @NotNull final Schema schema, + @NotNull final PartitionSpec partitionSpec, + @Nullable final IcebergReadInstructions readInstructions) { + this.schema = schema; + this.partitionSpec = partitionSpec; + this.readInstructions = readInstructions; + } + } + + /** + * Create {@link PartitionSpec} and {@link Schema} from a {@link TableDefinition}. + * + * @return A {@link SpecAndSchema} object containing the partition spec and schema, and {@code null} for read + * instructions. + */ + public static SpecAndSchema createSpecAndSchema(@NotNull final TableDefinition tableDefinition) { + final Collection partitioningColumnNames = new ArrayList<>(); + final List fields = new ArrayList<>(); + int fieldID = 1; // Iceberg field IDs start from 1 + + // Create the schema first and use it to build the partition spec + for (final ColumnDefinition columnDefinition : tableDefinition.getColumns()) { + final String dhColumnName = columnDefinition.getName(); + final Type icebergType = convertToIcebergType(columnDefinition.getDataType()); + fields.add(Types.NestedField.optional(fieldID, dhColumnName, icebergType)); + if (columnDefinition.isPartitioning()) { + partitioningColumnNames.add(dhColumnName); + } + fieldID++; + } + final Schema schema = new Schema(fields); + + final PartitionSpec partitionSpec = createPartitionSpec(schema, partitioningColumnNames); + return new SpecAndSchema(schema, partitionSpec, null); + } + + public static PartitionSpec createPartitionSpec( + @NotNull final Schema schema, + @NotNull final Iterable partitionColumnNames) { + final PartitionSpec.Builder partitionSpecBuilder = PartitionSpec.builderFor(schema); + for (final String partitioningColumnName : partitionColumnNames) { + partitionSpecBuilder.identity(partitioningColumnName); + } + return partitionSpecBuilder.build(); + } + + public static boolean createNamespaceIfNotExists( + @NotNull final Catalog catalog, + @NotNull final Namespace namespace) { + if (catalog instanceof SupportsNamespaces) { + final SupportsNamespaces nsCatalog = (SupportsNamespaces) catalog; + try { + nsCatalog.createNamespace(namespace); + return true; + } catch (final AlreadyExistsException | UnsupportedOperationException e) { + return false; + } + } + return false; + } + + public static boolean dropNamespaceIfExists( + @NotNull final Catalog catalog, + @NotNull final Namespace namespace) { + if (catalog instanceof SupportsNamespaces) { + final SupportsNamespaces nsCatalog = (SupportsNamespaces) catalog; + try { + return nsCatalog.dropNamespace(namespace); + } catch (final NamespaceNotEmptyException e) { + return false; + } + } + return false; + } + + /** + * Check that all required fields are present in the table definition + */ + public static void verifyRequiredFields(final Schema tableSchema, final TableDefinition tableDefinition) { + final List columnNames = tableDefinition.getColumnNames(); + for (final Types.NestedField field : tableSchema.columns()) { + if (field.isRequired() && !columnNames.contains(field.name())) { + // TODO (deephaven-core#6343): Add check for writeDefault() not set for required fields + throw new IllegalArgumentException("Field " + field + " is required in the table schema, but is not " + + "present in the table definition, table schema " + tableSchema + ", tableDefinition " + + tableDefinition); + } + } + } + + /** + * Check that all the partitioning columns from the partition spec are present in the Table Definition. + */ + public static void verifyPartitioningColumns( + final PartitionSpec tablePartitionSpec, + final TableDefinition tableDefinition) { + final List partitioningColumnNamesFromDefinition = tableDefinition.getColumnStream() + .filter(ColumnDefinition::isPartitioning) + .map(ColumnDefinition::getName) + .collect(Collectors.toList()); + final List partitionFieldsFromSchema = tablePartitionSpec.fields(); + if (partitionFieldsFromSchema.size() != partitioningColumnNamesFromDefinition.size()) { + throw new IllegalArgumentException("Partition spec contains " + partitionFieldsFromSchema.size() + + " fields, but the table definition contains " + partitioningColumnNamesFromDefinition.size() + + " fields, partition spec " + tablePartitionSpec + ", table definition " + tableDefinition); + } + for (int colIdx = 0; colIdx < partitionFieldsFromSchema.size(); colIdx += 1) { + final PartitionField partitionField = partitionFieldsFromSchema.get(colIdx); + if (!partitioningColumnNamesFromDefinition.get(colIdx).equals(partitionField.name())) { + throw new IllegalArgumentException("Partitioning column " + partitionField.name() + " is not present " + + "in the table definition at idx " + colIdx + ", table definition " + tableDefinition + + ", partition spec " + tablePartitionSpec); + } + } + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java index 9cab712df7f..d711119641b 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java @@ -7,6 +7,7 @@ import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.locations.TableDataException; import io.deephaven.engine.table.impl.locations.impl.TableLocationKeyFinder; +import io.deephaven.iceberg.base.IcebergUtils; import io.deephaven.iceberg.location.IcebergTableLocationKey; import io.deephaven.iceberg.location.IcebergTableParquetLocationKey; import io.deephaven.iceberg.relative.RelativeFileIO; @@ -26,8 +27,12 @@ import java.net.URI; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.UUID; import java.util.function.Consumer; +import java.util.stream.Stream; + +import static io.deephaven.iceberg.base.IcebergUtils.allManifestFiles; public abstract class IcebergBaseLayout implements TableLocationKeyFinder { /** @@ -176,32 +181,23 @@ public synchronized void findKeys(@NotNull final Consumer manifestFiles = snapshot.allManifests(table.io()); - for (final ManifestFile manifestFile : manifestFiles) { - // Currently only can process manifest files with DATA content type. - if (manifestFile.content() != ManifestContent.DATA) { - throw new TableDataException( - String.format("%s:%d - only DATA manifest files are currently supported, encountered %s", - table, snapshot.snapshotId(), manifestFile.content())); - } - try (final ManifestReader reader = ManifestFiles.read(manifestFile, table.io())) { - for (final DataFile dataFile : reader) { - final URI fileUri = dataFileUri(table, dataFile); - if (!uriScheme.equals(fileUri.getScheme())) { - throw new TableDataException(String.format( - "%s:%d - multiple URI schemes are not currently supported. uriScheme=%s, fileUri=%s", - table, snapshot.snapshotId(), uriScheme, fileUri)); - } - final IcebergTableLocationKey locationKey = keyFromDataFile(manifestFile, dataFile, fileUri); - if (locationKey != null) { - locationKeyObserver.accept(locationKey); - } - } - } - } - } catch (final Exception e) { + try (final Stream manifestFiles = allManifestFiles(table, snapshot)) { + manifestFiles.forEach(manifestFile -> { + final ManifestReader reader = ManifestFiles.read(manifestFile, table.io()); + IcebergUtils.toStream(reader) + .map(dataFile -> { + final URI fileUri = dataFileUri(table, dataFile); + if (!uriScheme.equals(fileUri.getScheme())) { + throw new TableDataException(String.format( + "%s:%d - multiple URI schemes are not currently supported. uriScheme=%s, " + + "fileUri=%s", + table, snapshot.snapshotId(), uriScheme, fileUri)); + } + return keyFromDataFile(manifestFile, dataFile, fileUri); + }) + .forEach(locationKeyObserver); + }); + } catch (final RuntimeException e) { throw new TableDataException( String.format("%s:%d - error finding Iceberg locations", tableAdapter, snapshot.snapshotId()), e); } diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergKeyValuePartitionedLayout.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergKeyValuePartitionedLayout.java index f362139133f..86e63f7dbb9 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergKeyValuePartitionedLayout.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergKeyValuePartitionedLayout.java @@ -13,6 +13,7 @@ import io.deephaven.util.type.TypeUtils; import org.apache.commons.lang3.mutable.MutableInt; import org.apache.iceberg.*; +import org.apache.iceberg.data.IdentityPartitionConverters; import org.jetbrains.annotations.NotNull; import java.net.URI; @@ -54,6 +55,13 @@ public IcebergKeyValuePartitionedLayout( // in the output definition, so we can ignore duplicates. final MutableInt icebergIndex = new MutableInt(0); final Map availablePartitioningColumns = partitionSpec.fields().stream() + .peek(partitionField -> { + // TODO (deephaven-core#6438): Add support to handle non-identity transforms + if (!partitionField.transform().isIdentity()) { + throw new TableDataException("Partition field " + partitionField.name() + " has a " + + "non-identity transform: " + partitionField.transform() + ", which is not supported"); + } + }) .map(PartitionField::name) .map(name -> instructions.columnRenames().getOrDefault(name, name)) .collect(Collectors.toMap( @@ -89,11 +97,19 @@ IcebergTableLocationKey keyFromDataFile( final PartitionData partitionData = (PartitionData) dataFile.partition(); for (final ColumnData colData : outputPartitioningColumns) { final String colName = colData.name; - final Object colValue = partitionData.get(colData.index); - if (colValue != null && !colData.type.isAssignableFrom(colValue.getClass())) { - throw new TableDataException("Partitioning column " + colName - + " has type " + colValue.getClass().getName() - + " but expected " + colData.type.getName()); + final Object colValue; + final Object valueFromPartitionData = partitionData.get(colData.index); + if (valueFromPartitionData != null) { + // TODO (deephaven-core#6438): Assuming identity transform here + colValue = IdentityPartitionConverters.convertConstant( + partitionData.getType(colData.index), valueFromPartitionData); + if (!colData.type.isAssignableFrom(colValue.getClass())) { + throw new TableDataException("Partitioning column " + colName + + " has type " + colValue.getClass().getName() + + " but expected " + colData.type.getName()); + } + } else { + colValue = null; } partitions.put(colName, (Comparable) colValue); } diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java index a096d21570c..2c0206c0c89 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java @@ -7,19 +7,27 @@ import io.deephaven.engine.table.*; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.sources.InMemoryColumnSource; +import io.deephaven.iceberg.base.IcebergUtils; import io.deephaven.iceberg.internal.DataInstructionsProviderLoader; import io.deephaven.iceberg.internal.DataInstructionsProviderPlugin; import io.deephaven.util.annotations.VisibleForTesting; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; import org.jetbrains.annotations.NotNull; import org.apache.iceberg.rest.RESTCatalog; import org.apache.iceberg.rest.ResourcePaths; import java.util.*; +import static io.deephaven.iceberg.base.IcebergUtils.createNamespaceIfNotExists; +import static io.deephaven.iceberg.base.IcebergUtils.dropNamespaceIfExists; + public class IcebergCatalogAdapter { @VisibleForTesting @@ -86,7 +94,6 @@ static IcebergCatalogAdapter of(RESTCatalog restCatalog) { dataInstructionsProvider = DataInstructionsProviderLoader.create(Map.copyOf(properties)); } - /** * List all {@link Namespace namespaces} in the catalog. This method is only supported if the catalog implements * {@link SupportsNamespaces} for namespace discovery. See {@link SupportsNamespaces#listNamespaces(Namespace)}. @@ -249,8 +256,65 @@ public IcebergTableAdapter loadTable(@NotNull final TableIdentifier tableIdentif /** * Returns the underlying Iceberg {@link Catalog catalog} used by this adapter. */ - @SuppressWarnings("unused") public Catalog catalog() { return catalog; } + + /** + * Create a new Iceberg table in the catalog with the given table identifier and definition. + *

+ * All columns of type {@link ColumnDefinition.ColumnType#Partitioning partitioning} will be used to create the + * partition spec for the table. + * + * @param tableIdentifier The identifier string of the new table. + * @param definition The {@link TableDefinition} of the new table. + * @return The {@link IcebergTableAdapter table adapter} for the new Iceberg table. + * @throws AlreadyExistsException if the table already exists + */ + public IcebergTableAdapter createTable( + @NotNull final String tableIdentifier, + @NotNull final TableDefinition definition) { + return createTable(TableIdentifier.parse(tableIdentifier), definition); + } + + /** + * Create a new Iceberg table in the catalog with the given table identifier and definition. + *

+ * All columns of type {@link ColumnDefinition.ColumnType#Partitioning partitioning} will be used to create the + * partition spec for the table. + * + * @param tableIdentifier The identifier of the new table. + * @param definition The {@link TableDefinition} of the new table. + * @return The {@link IcebergTableAdapter table adapter} for the new Iceberg table. + * @throws AlreadyExistsException if the table already exists + */ + public IcebergTableAdapter createTable( + @NotNull final TableIdentifier tableIdentifier, + @NotNull final TableDefinition definition) { + final IcebergUtils.SpecAndSchema specAndSchema = IcebergUtils.createSpecAndSchema(definition); + return createTable(tableIdentifier, specAndSchema.schema, specAndSchema.partitionSpec); + } + + private IcebergTableAdapter createTable( + @NotNull final TableIdentifier tableIdentifier, + @NotNull final Schema schema, + @NotNull final PartitionSpec partitionSpec) { + final boolean newNamespaceCreated = createNamespaceIfNotExists(catalog, tableIdentifier.namespace()); + try { + final org.apache.iceberg.Table table = + catalog.createTable(tableIdentifier, schema, partitionSpec, + Map.of(TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT)); + return new IcebergTableAdapter(catalog, tableIdentifier, table, dataInstructionsProvider); + } catch (final Throwable throwable) { + if (newNamespaceCreated) { + // Delete it to avoid leaving a partial namespace in the catalog + try { + dropNamespaceIfExists(catalog, tableIdentifier.namespace()); + } catch (final RuntimeException dropException) { + throwable.addSuppressed(dropException); + } + } + throw throwable; + } + } } diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergReadInstructions.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergReadInstructions.java index 82271590900..fa91cc9c2d3 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergReadInstructions.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergReadInstructions.java @@ -22,9 +22,8 @@ public abstract class IcebergReadInstructions { /** * The default {@link IcebergReadInstructions} to use when reading Iceberg data files. Providing this will use - * system defaults for cloud provider-specific parameters + * system defaults for cloud provider-specific parameters. */ - @SuppressWarnings("unused") public static final IcebergReadInstructions DEFAULT = builder().build(); public static Builder builder() { diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableAdapter.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableAdapter.java index 7464ece4988..dff4f620a25 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableAdapter.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableAdapter.java @@ -19,6 +19,7 @@ import io.deephaven.engine.table.impl.sources.regioned.RegionedTableComponentFactoryImpl; import io.deephaven.engine.updategraph.UpdateSourceRegistrar; import io.deephaven.engine.util.TableTools; +import io.deephaven.iceberg.base.IcebergUtils.SpecAndSchema; import io.deephaven.iceberg.internal.DataInstructionsProviderLoader; import io.deephaven.iceberg.layout.*; import io.deephaven.iceberg.location.IcebergTableLocationFactory; @@ -38,10 +39,11 @@ import org.jetbrains.annotations.Nullable; import java.time.Instant; -import java.time.LocalDateTime; import java.util.*; import java.util.stream.Collectors; +import static io.deephaven.iceberg.base.IcebergUtils.convertToDHType; + /** * This class manages an Iceberg {@link org.apache.iceberg.Table table} and provides methods to interact with it. */ @@ -266,24 +268,6 @@ public Snapshot getSnapshot(@NotNull final IcebergReadInstructions readInstructi return null; } - /** - * Used to hold return value for {@link #getSpecAndSchema(IcebergReadInstructions)}. - */ - private static final class SpecAndSchema { - private final Schema schema; - private final PartitionSpec partitionSpec; - private final IcebergReadInstructions readInstructions; - - private SpecAndSchema( - @NotNull final Schema schema, - @NotNull final PartitionSpec partitionSpec, - @NotNull final IcebergReadInstructions readInstructions) { - this.schema = schema; - this.partitionSpec = partitionSpec; - this.readInstructions = readInstructions; - } - } - /** * Retrieve the schema and partition spec for the table based on the provided read instructions. Also, populate the * read instructions with the requested snapshot, or the latest snapshot if none is requested. @@ -552,7 +536,7 @@ private static TableDefinition fromSchema( continue; } final Type type = field.type(); - final io.deephaven.qst.type.Type qstType = convertPrimitiveType(type); + final io.deephaven.qst.type.Type qstType = convertToDHType(type); final ColumnDefinition column; if (partitionNames.contains(name)) { column = ColumnDefinition.of(name, qstType).withPartitioning(); @@ -590,46 +574,16 @@ private static TableDefinition fromSchema( } /** - * Convert an Iceberg data type to a Deephaven type. + * Create a new {@link IcebergTableWriter} for this Iceberg table using the provided {@link TableWriterOptions}. + *

+ * This method will perform schema validation to ensure that the provided + * {@link TableWriterOptions#tableDefinition()} is compatible with the Iceberg table schema. All further writes + * performed by the returned writer will not be validated against the table's schema, and thus will be faster. * - * @param icebergType The Iceberg data type to be converted. - * @return The converted Deephaven type. + * @param tableWriterOptions The options to configure the table writer. + * @return A new instance of {@link IcebergTableWriter} configured with the provided options. */ - static io.deephaven.qst.type.Type convertPrimitiveType(@NotNull final Type icebergType) { - final Type.TypeID typeId = icebergType.typeId(); - switch (typeId) { - case BOOLEAN: - return io.deephaven.qst.type.Type.booleanType().boxedType(); - case DOUBLE: - return io.deephaven.qst.type.Type.doubleType(); - case FLOAT: - return io.deephaven.qst.type.Type.floatType(); - case INTEGER: - return io.deephaven.qst.type.Type.intType(); - case LONG: - return io.deephaven.qst.type.Type.longType(); - case STRING: - return io.deephaven.qst.type.Type.stringType(); - case TIMESTAMP: - final Types.TimestampType timestampType = (Types.TimestampType) icebergType; - return timestampType.shouldAdjustToUTC() - ? io.deephaven.qst.type.Type.find(Instant.class) - : io.deephaven.qst.type.Type.find(LocalDateTime.class); - case DATE: - return io.deephaven.qst.type.Type.find(java.time.LocalDate.class); - case TIME: - return io.deephaven.qst.type.Type.find(java.time.LocalTime.class); - case DECIMAL: - return io.deephaven.qst.type.Type.find(java.math.BigDecimal.class); - case FIXED: // Fall through - case BINARY: - return io.deephaven.qst.type.Type.find(byte[].class); - case UUID: // Fall through - case STRUCT: // Fall through - case LIST: // Fall through - case MAP: // Fall through - default: - throw new TableDataException("Unsupported iceberg column type " + typeId.name()); - } + public IcebergTableWriter tableWriter(final TableWriterOptions tableWriterOptions) { + return new IcebergTableWriter(tableWriterOptions, this); } } diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableWriter.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableWriter.java new file mode 100644 index 00000000000..1f4fa3597af --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergTableWriter.java @@ -0,0 +1,525 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import io.deephaven.base.Pair; +import io.deephaven.base.verify.Require; +import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.engine.context.QueryScope; +import io.deephaven.engine.context.StandaloneQueryScope; +import io.deephaven.engine.table.ColumnDefinition; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.table.impl.locations.TableDataException; +import io.deephaven.parquet.table.CompletedParquetWrite; +import io.deephaven.parquet.table.ParquetInstructions; +import io.deephaven.parquet.table.ParquetTools; +import io.deephaven.iceberg.util.SchemaProviderInternal.SchemaProviderImpl; +import io.deephaven.util.SafeCloseable; +import org.apache.iceberg.AppendFiles; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.FileFormat; +import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.PartitionData; +import org.apache.iceberg.PartitionField; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.Transaction; +import org.apache.iceberg.encryption.EncryptedOutputFile; +import org.apache.iceberg.io.OutputFileFactory; +import org.apache.iceberg.mapping.MappedField; +import org.apache.iceberg.mapping.NameMapping; +import org.apache.iceberg.mapping.NameMappingParser; +import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.jetbrains.annotations.NotNull; + +import java.time.Instant; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import static io.deephaven.iceberg.base.IcebergUtils.verifyPartitioningColumns; +import static io.deephaven.iceberg.base.IcebergUtils.verifyRequiredFields; + +/** + * This class is responsible for writing Deephaven tables to an Iceberg table. Each instance of this class is associated + * with a single {@link IcebergTableAdapter} and can be used to write multiple Deephaven tables to this Iceberg table. + */ +public class IcebergTableWriter { + + /** + * The options used to configure the behavior of this writer instance. + */ + private final TableParquetWriterOptions tableWriterOptions; + + /** + * The Iceberg table which will be written to by this instance. + */ + private final org.apache.iceberg.Table table; + + /** + * Store the partition spec of the Iceberg table at the time of creation of this writer instance and use it for all + * writes, so that even if the table spec, the writer will still work. + */ + private final PartitionSpec tableSpec; + + /** + * The table definition used for all writes by this writer instance. + */ + private final TableDefinition tableDefinition; + + /** + * The table definition consisting of non-partitioning columns from {@link #tableDefinition}. All tables written by + * this writer are expected to have a compatible definition with this. + */ + private final TableDefinition nonPartitioningTableDefinition; + + /** + * The schema to use when in conjunction with the {@link #fieldIdToColumnName} to map Deephaven columns from + * {@link #tableDefinition} to Iceberg columns. + */ + private final Schema userSchema; + + /** + * Mapping from Iceberg field IDs to Deephaven column names, populated inside the parquet file. + */ + private final Map fieldIdToColumnName; + + /** + * The factory to create new output file locations for writing data files. + */ + private final OutputFileFactory outputFileFactory; + + /** + * Characters to be used for generating random variable names of length {@link #VARIABLE_NAME_LENGTH}. + */ + private static final String CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + private static final int VARIABLE_NAME_LENGTH = 6; + + IcebergTableWriter( + final TableWriterOptions tableWriterOptions, + final IcebergTableAdapter tableAdapter) { + this.tableWriterOptions = verifyWriterOptions(tableWriterOptions); + this.table = tableAdapter.icebergTable(); + + this.tableSpec = table.spec(); + + this.tableDefinition = tableWriterOptions.tableDefinition(); + this.nonPartitioningTableDefinition = nonPartitioningTableDefinition(tableDefinition); + verifyRequiredFields(table.schema(), tableDefinition); + verifyPartitioningColumns(tableSpec, tableDefinition); + + this.userSchema = ((SchemaProviderImpl) tableWriterOptions.schemaProvider()).getSchema(table); + verifyFieldIdsInSchema(tableWriterOptions.fieldIdToColumnName().keySet(), userSchema); + + // Create a copy of the fieldIdToColumnName map since we might need to add new entries for columns which are not + // provided by the user. + this.fieldIdToColumnName = new HashMap<>(tableWriterOptions.fieldIdToColumnName()); + addFieldIdsForAllColumns(); + + outputFileFactory = OutputFileFactory.builderFor(table, 0, 0) + .format(FileFormat.PARQUET) + .build(); + } + + private static TableParquetWriterOptions verifyWriterOptions( + @NotNull final TableWriterOptions tableWriterOptions) { + // We ony support writing to Parquet files + if (!(tableWriterOptions instanceof TableParquetWriterOptions)) { + throw new IllegalArgumentException( + "Unsupported options of class " + tableWriterOptions.getClass() + " for" + + " writing Iceberg table, expected: " + TableParquetWriterOptions.class); + } + return (TableParquetWriterOptions) tableWriterOptions; + } + + /** + * Return a {@link TableDefinition} which contains only the non-partitioning columns from the provided table + * definition. + */ + private static TableDefinition nonPartitioningTableDefinition( + @NotNull final TableDefinition tableDefinition) { + final Collection> nonPartitioningColumns = new ArrayList<>(); + for (final ColumnDefinition columnDefinition : tableDefinition.getColumns()) { + if (!columnDefinition.isPartitioning()) { + nonPartitioningColumns.add(columnDefinition); + } + } + return TableDefinition.of(nonPartitioningColumns); + } + + /** + * Check that all the field IDs are present in the schema. + */ + private static void verifyFieldIdsInSchema(final Collection fieldIds, final Schema schema) { + if (!fieldIds.isEmpty()) { + for (final Integer fieldId : fieldIds) { + if (schema.findField(fieldId) == null) { + throw new IllegalArgumentException("Column corresponding to field ID " + fieldId + " not " + + "found in schema, available columns in schema are: " + schema.columns()); + } + } + } + } + + /** + * Populate the {@link #fieldIdToColumnName} map for all the columns in the {@link #tableDefinition} and do + * additional checks to ensure that the table definition is compatible with schema provided by user. + */ + private void addFieldIdsForAllColumns() { + final Map dhColumnNameToFieldId = tableWriterOptions.dhColumnNameToFieldId(); + Map nameMappingDefault = null; // Lazily initialized + for (final ColumnDefinition columnDefinition : tableDefinition.getColumns()) { + final String columnName = columnDefinition.getName(); + + // We are done if we already have the mapping between column name and field ID + if (dhColumnNameToFieldId.containsKey(columnName)) { + continue; + } + + // To be populated by the end of this block for each column, else throw an exception + Integer fieldId = null; + Types.NestedField nestedField; + + // Check in the schema.name_mapping.default map + if (nameMappingDefault == null) { + nameMappingDefault = readNameMappingDefault(); + } + fieldId = nameMappingDefault.get(columnName); + if (fieldId != null) { + nestedField = userSchema.findField(fieldId); + if (nestedField == null) { + throw new IllegalArgumentException("Field ID " + fieldId + " extracted for " + + "column " + columnName + " from the schema.name_mapping map not found in schema " + + userSchema); + } + } + + // Directly lookup in the user provided schema using column name + if (fieldId == null) { + nestedField = userSchema.findField(columnName); + if (nestedField != null) { + fieldId = nestedField.fieldId(); + } + } + + if (fieldId == null) { + throw new IllegalArgumentException("Column " + columnName + " not found in the schema or " + + "the name mapping for the table"); + } + + fieldIdToColumnName.put(fieldId, columnName); + } + } + + /** + * Build the mapping from column names to field IDs on demand using the + * {@value TableProperties#DEFAULT_NAME_MAPPING} map. + *

+ * Return an empty map if the table metadata is null or the mapping is not present in the table metadata. + */ + private Map readNameMappingDefault() { + final TableMetadata tableMetadata; + if (table instanceof HasTableOperations) { + tableMetadata = ((HasTableOperations) table).operations().current(); + } else { + // TableMetadata is not available, so nothing to add to the map + return Map.of(); + } + final String nameMappingJson = tableMetadata.property(TableProperties.DEFAULT_NAME_MAPPING, null); + if (nameMappingJson == null) { + return Map.of(); + } + // Iterate over all mapped fields and build a reverse map from column name to field ID + final Map nameMappingDefault = new HashMap<>(); + final NameMapping nameMapping = NameMappingParser.fromJson(nameMappingJson); + for (final MappedField field : nameMapping.asMappedFields().fields()) { + final Integer fieldId = field.id(); + for (final String name : field.names()) { + nameMappingDefault.put(name, fieldId); + } + } + return nameMappingDefault; + } + + /** + * Append the provided Deephaven {@link IcebergWriteInstructions#tables()} as new partitions to the existing Iceberg + * table in a single snapshot. This method will not perform any compatibility checks between the existing schema and + * the provided Deephaven tables. + * + * @param writeInstructions The instructions for customizations while writing. + */ + public void append(@NotNull final IcebergWriteInstructions writeInstructions) { + final List dataFilesWritten = writeDataFiles(writeInstructions); + commit(dataFilesWritten); + } + + /** + * Writes data from Deephaven {@link IcebergWriteInstructions#tables()} to an Iceberg table without creating a new + * snapshot. This method returns a list of data files that were written. Users can use this list to create a + * transaction/snapshot if needed. This method will not perform any compatibility checks between the existing schema + * and the provided Deephaven tables. + * + * @param writeInstructions The instructions for customizations while writing. + */ + public List writeDataFiles(@NotNull final IcebergWriteInstructions writeInstructions) { + verifyCompatible(writeInstructions.tables(), nonPartitioningTableDefinition); + final List partitionPaths = writeInstructions.partitionPaths(); + verifyPartitionPaths(tableSpec, partitionPaths); + final List partitionData; + final List parquetFileInfo; + // Start a new query scope to avoid polluting the existing query scope with new parameters added for + // partitioning columns + try (final SafeCloseable _ignore = + ExecutionContext.getContext().withQueryScope(new StandaloneQueryScope()).open()) { + final Pair, List> ret = partitionDataFromPaths(tableSpec, partitionPaths); + partitionData = ret.getFirst(); + final List dhTableUpdateStrings = ret.getSecond(); + parquetFileInfo = writeParquet(partitionData, dhTableUpdateStrings, writeInstructions); + } + return dataFilesFromParquet(parquetFileInfo, partitionData); + } + + /** + * Verify that all the tables are compatible with the provided table definition. + */ + private static void verifyCompatible( + @NotNull final Iterable tables, + @NotNull final TableDefinition expectedDefinition) { + for (final Table table : tables) { + expectedDefinition.checkMutualCompatibility(table.getDefinition()); + } + } + + private static void verifyPartitionPaths( + final PartitionSpec partitionSpec, + final Collection partitionPaths) { + if (partitionSpec.isPartitioned() && partitionPaths.isEmpty()) { + throw new IllegalArgumentException("Cannot write data to a partitioned table without partition paths."); + } + if (!partitionSpec.isPartitioned() && !partitionPaths.isEmpty()) { + throw new IllegalArgumentException("Cannot write data to an un-partitioned table with partition paths."); + } + } + + /** + * Creates a list of {@link PartitionData} and corresponding update strings for Deephaven tables from partition + * paths and spec. Also, validates that the partition paths are compatible with the provided partition spec. + * + * @param partitionSpec The partition spec to use for validation. + * @param partitionPaths The list of partition paths to process. + * @return A pair containing a list of PartitionData objects and a list of update strings for Deephaven tables. + * @throws IllegalArgumentException if the partition paths are not compatible with the partition spec. + * + * @implNote Check implementations of {@link DataFiles#data} and {@link Conversions#fromPartitionString} for more + * details on how partition paths should be parsed, how each type of value is parsed from a string and + * what types are allowed for partitioning columns. + */ + private static Pair, List> partitionDataFromPaths( + final PartitionSpec partitionSpec, + final Collection partitionPaths) { + final List partitionDataList = new ArrayList<>(partitionPaths.size()); + final List dhTableUpdateStringList = new ArrayList<>(partitionPaths.size()); + final int numPartitioningFields = partitionSpec.fields().size(); + final QueryScope queryScope = ExecutionContext.getContext().getQueryScope(); + for (final String partitionPath : partitionPaths) { + final String[] dhTableUpdateString = new String[numPartitioningFields]; + try { + final String[] partitions = partitionPath.split("/", -1); + if (partitions.length != numPartitioningFields) { + throw new IllegalArgumentException("Expecting " + numPartitioningFields + " number of fields, " + + "found " + partitions.length); + } + final PartitionData partitionData = new PartitionData(partitionSpec.partitionType()); + for (int colIdx = 0; colIdx < partitions.length; colIdx += 1) { + final String[] parts = partitions[colIdx].split("=", 2); + if (parts.length != 2) { + throw new IllegalArgumentException("Expecting key=value format, found " + partitions[colIdx]); + } + final PartitionField field = partitionSpec.fields().get(colIdx); + if (!field.name().equals(parts[0])) { + throw new IllegalArgumentException("Expecting field name " + field.name() + " at idx " + + colIdx + ", found " + parts[0]); + } + final Type type = partitionData.getType(colIdx); + dhTableUpdateString[colIdx] = getTableUpdateString(field.name(), type, parts[1], queryScope); + partitionData.set(colIdx, Conversions.fromPartitionString(partitionData.getType(colIdx), parts[1])); + } + } catch (final Exception e) { + throw new IllegalArgumentException("Failed to parse partition path: " + partitionPath + " using" + + " partition spec " + partitionSpec + ", check cause for more details ", e); + } + dhTableUpdateStringList.add(dhTableUpdateString); + partitionDataList.add(DataFiles.data(partitionSpec, partitionPath)); + } + return new Pair<>(partitionDataList, dhTableUpdateStringList); + } + + /** + * This method would convert a partitioning column info to a string which can be used in + * {@link io.deephaven.engine.table.Table#updateView(Collection) Table#updateView} method. For example, if the + * partitioning column of name "partitioningColumnName" if of type {@link Types.TimestampType} and the value is + * "2021-01-01T00:00:00Z", then this method would: + *
    + *
  • Add a new parameter to the query scope with a random name and value as {@link Instant} parsed from the string + * "2021-01-01T00:00:00Z"
  • + *
  • Return the string "partitioningColumnName = randomName"
  • + *
+ * + * @param colName The name of the partitioning column + * @param colType The type of the partitioning column + * @param value The value of the partitioning column + * @param queryScope The query scope to add the parameter to + */ + private static String getTableUpdateString( + @NotNull final String colName, + @NotNull final Type colType, + @NotNull final String value, + @NotNull final QueryScope queryScope) { + // Randomly generated name to be added to the query scope for each value to avoid repeated casts + // TODO(deephaven-core#6418): Find a better way to handle these table updates instead of using query scope + final String paramName = generateRandomAlphabetString(VARIABLE_NAME_LENGTH); + final Type.TypeID typeId = colType.typeId(); + if (typeId == Type.TypeID.BOOLEAN) { + queryScope.putParam(paramName, Boolean.parseBoolean(value)); + } else if (typeId == Type.TypeID.DOUBLE) { + queryScope.putParam(paramName, Double.parseDouble(value)); + } else if (typeId == Type.TypeID.FLOAT) { + queryScope.putParam(paramName, Float.parseFloat(value)); + } else if (typeId == Type.TypeID.INTEGER) { + queryScope.putParam(paramName, Integer.parseInt(value)); + } else if (typeId == Type.TypeID.LONG) { + queryScope.putParam(paramName, Long.parseLong(value)); + } else if (typeId == Type.TypeID.STRING) { + queryScope.putParam(paramName, value); + } else if (typeId == Type.TypeID.DATE) { + queryScope.putParam(paramName, LocalDate.parse(value)); + } else { + // TODO (deephaven-core#6327) Add support for more partitioning types like Big Decimals + throw new TableDataException("Unsupported partitioning column type " + typeId.name()); + } + return colName + " = " + paramName; + } + + /** + * Generate a random string of length {@code length} using just alphabets. + */ + private static String generateRandomAlphabetString(final int length) { + final StringBuilder stringBuilder = new StringBuilder(); + final Random random = new Random(); + for (int i = 0; i < length; i++) { + final int index = random.nextInt(CHARACTERS.length()); + stringBuilder.append(CHARACTERS.charAt(index)); + } + return stringBuilder.toString(); + } + + @NotNull + private List writeParquet( + @NotNull final List partitionDataList, + @NotNull final List dhTableUpdateStrings, + @NotNull final IcebergWriteInstructions writeInstructions) { + final List
dhTables = writeInstructions.tables(); + final boolean isPartitioned = tableSpec.isPartitioned(); + if (isPartitioned) { + Require.eq(dhTables.size(), "dhTables.size()", + partitionDataList.size(), "partitionDataList.size()"); + Require.eq(dhTables.size(), "dhTables.size()", + dhTableUpdateStrings.size(), "dhTableUpdateStrings.size()"); + } else { + Require.eqZero(partitionDataList.size(), "partitionDataList.size()"); + Require.eqZero(dhTableUpdateStrings.size(), "dhTableUpdateStrings.size()"); + } + + // Build the parquet instructions + final List parquetFilesWritten = new ArrayList<>(dhTables.size()); + final ParquetInstructions.OnWriteCompleted onWriteCompleted = parquetFilesWritten::add; + final ParquetInstructions parquetInstructions = tableWriterOptions.toParquetInstructions( + onWriteCompleted, tableDefinition, fieldIdToColumnName); + + // Write the data to parquet files + for (int idx = 0; idx < dhTables.size(); idx++) { + Table dhTable = dhTables.get(idx); + if (dhTable.numColumns() == 0) { + // Skip writing empty tables with no columns + continue; + } + final String newDataLocation; + if (isPartitioned) { + newDataLocation = getDataLocation(partitionDataList.get(idx)); + dhTable = dhTable.updateView(dhTableUpdateStrings.get(idx)); + } else { + newDataLocation = getDataLocation(); + } + // TODO (deephaven-core#6343): Set writeDefault() values for required columns that not present in the table + ParquetTools.writeTable(dhTable, newDataLocation, parquetInstructions); + } + return parquetFilesWritten; + } + + /** + * Generate the location string for a new data file for the given partition data. + */ + private String getDataLocation(@NotNull final PartitionData partitionData) { + final EncryptedOutputFile outputFile = outputFileFactory.newOutputFile(tableSpec, partitionData); + return outputFile.encryptingOutputFile().location(); + } + + /** + * Generate the location string for a new data file for the unpartitioned table. + */ + private String getDataLocation() { + final EncryptedOutputFile outputFile = outputFileFactory.newOutputFile(); + return outputFile.encryptingOutputFile().location(); + } + + /** + * Commit the changes to the Iceberg table by creating a snapshot. + */ + private void commit( + @NotNull final Iterable dataFiles) { + final Transaction icebergTransaction = table.newTransaction(); + + // Append the new data files to the table + final AppendFiles append = icebergTransaction.newAppend(); + dataFiles.forEach(append::appendFile); + append.commit(); + + // Commit the transaction, creating new snapshot + icebergTransaction.commitTransaction(); + } + + /** + * Generate a list of {@link DataFile} objects from a list of parquet files written. + */ + private List dataFilesFromParquet( + @NotNull final List parquetFilesWritten, + @NotNull final List partitionDataList) { + final int numFiles = parquetFilesWritten.size(); + final List dataFiles = new ArrayList<>(numFiles); + final PartitionSpec partitionSpec = tableSpec; + for (int idx = 0; idx < numFiles; idx++) { + final CompletedParquetWrite completedWrite = parquetFilesWritten.get(idx); + final DataFiles.Builder dataFileBuilder = DataFiles.builder(partitionSpec) + .withPath(completedWrite.destination().toString()) + .withFormat(FileFormat.PARQUET) + .withRecordCount(completedWrite.numRows()) + .withFileSizeInBytes(completedWrite.numBytes()); + if (partitionSpec.isPartitioned()) { + dataFileBuilder.withPartition(partitionDataList.get(idx)); + } + dataFiles.add(dataFileBuilder.build()); + } + return dataFiles; + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergWriteInstructions.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergWriteInstructions.java new file mode 100644 index 00000000000..12afb5ef9e4 --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergWriteInstructions.java @@ -0,0 +1,76 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import io.deephaven.annotations.BuildableStyle; +import io.deephaven.engine.table.Table; +import org.immutables.value.Value; +import org.immutables.value.Value.Immutable; + +import java.util.List; + +/** + * This class provides instructions intended for writing deephaven tables as partitions to Iceberg tables. + */ +@Immutable +@BuildableStyle +public abstract class IcebergWriteInstructions { + + public static Builder builder() { + return ImmutableIcebergWriteInstructions.builder(); + } + + /** + * The Deephaven tables to be written. + *

+ * All tables must have the same table definition as definition for non-partitioning columns specified in the + * {@link IcebergTableWriter}. For example, if an iceberg table is partitioned by "year" and "month" and has a + * non-partitioning column "data," then {@link IcebergTableWriter} should be configured with a definition that + * includes all three columns: "year," "month," and "data." But, the tables provided here should only include the + * non-partitioning column, such as "data." + */ + public abstract List

tables(); + + /** + * The partition paths where each table will be written. For example, if the table is partitioned by "year" and + * "month", the partition path could be "year=2021/month=01". + *

+ * If writing to a partitioned iceberg table, users must provide partition path for each table in {@link #tables()} + * in the same order. Else, this should be an empty list. + */ + public abstract List partitionPaths(); + + // @formatter:off + public interface Builder { + // @formatter:on + Builder addTables(Table element); + + Builder addTables(Table... elements); + + Builder addAllTables(Iterable elements); + + Builder addPartitionPaths(String element); + + Builder addPartitionPaths(String... elements); + + Builder addAllPartitionPaths(Iterable elements); + + IcebergWriteInstructions build(); + } + + @Value.Check + final void countCheckTables() { + if (tables().isEmpty()) { + throw new IllegalArgumentException("At least one table must be provided"); + } + } + + @Value.Check + final void countCheckPartitionPaths() { + if (!partitionPaths().isEmpty() && partitionPaths().size() != tables().size()) { + throw new IllegalArgumentException("Partition path must be provided for each table, partitionPaths.size()=" + + partitionPaths().size() + ", tables.size()=" + tables().size()); + } + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/SchemaProvider.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/SchemaProvider.java new file mode 100644 index 00000000000..579c849bfe4 --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/SchemaProvider.java @@ -0,0 +1,33 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import org.apache.iceberg.Schema; + +/** + * A specification for extracting the schema from a table. + */ +public interface SchemaProvider { + + // Static factory methods for creating SchemaProvider instances + static SchemaProvider fromCurrent() { + return new SchemaProviderInternal.CurrentSchemaProvider(); + } + + static SchemaProvider fromSchemaId(final int id) { + return new SchemaProviderInternal.IdSchemaProvider(id); + } + + static SchemaProvider fromSchema(final Schema schema) { + return new SchemaProviderInternal.DirectSchemaProvider(schema); + } + + static SchemaProvider fromSnapshotId(final int snapshotId) { + return new SchemaProviderInternal.SnapshotIdSchemaProvider(snapshotId); + } + + static SchemaProvider fromCurrentSnapshot() { + return new SchemaProviderInternal.CurrentSnapshotSchemaProvider(); + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/SchemaProviderInternal.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/SchemaProviderInternal.java new file mode 100644 index 00000000000..25989d7609e --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/SchemaProviderInternal.java @@ -0,0 +1,117 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import org.apache.iceberg.Schema; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; + +/** + * Internal class containing the implementations of {@link SchemaProvider}. + */ +class SchemaProviderInternal { + + interface SchemaProviderImpl { + /** + * Returns the schema for the given table based on this {@link SchemaProvider}. + */ + Schema getSchema(Table table); + } + + // Implementations of SchemaProvider + static class CurrentSchemaProvider implements SchemaProvider, SchemaProviderImpl { + @Override + public Schema getSchema(final Table table) { + return getCurrentSchema(table); + } + } + + static class IdSchemaProvider implements SchemaProvider, SchemaProviderImpl { + private final int schemaId; + + IdSchemaProvider(final int schemaId) { + this.schemaId = schemaId; + } + + @Override + public Schema getSchema(final Table table) { + return getSchemaForId(table, schemaId); + } + } + + static class DirectSchemaProvider implements SchemaProvider, SchemaProviderImpl { + private final Schema schema; + + DirectSchemaProvider(final Schema schema) { + this.schema = schema; + } + + @Override + public Schema getSchema(final Table table) { + return schema; + } + } + + static class SnapshotIdSchemaProvider implements SchemaProvider, SchemaProviderImpl { + private final int snapshotId; + + SnapshotIdSchemaProvider(final int snapshotId) { + this.snapshotId = snapshotId; + } + + @Override + public Schema getSchema(final Table table) { + return getSchemaForSnapshotId(table, snapshotId); + } + } + + static class CurrentSnapshotSchemaProvider implements SchemaProvider, SchemaProviderImpl { + @Override + public Schema getSchema(final Table table) { + return getSchemaForCurrentSnapshot(table); + } + } + + // -------------------------------------------------------------------------------------------------- + + // Methods for extracting the schema from the table + private static Schema getCurrentSchema(final Table table) { + return table.schema(); + } + + private static Schema getSchemaForId(final Table table, final int schemaId) { + final Schema schema = table.schemas().get(schemaId); + if (schema == null) { + throw new IllegalArgumentException("Schema with ID " + schemaId + " not found for table " + table); + } + return schema; + } + + private static Schema getSchemaForSnapshotId(final Table table, final int snapshotId) { + final Snapshot snapshot = table.snapshot(snapshotId); + if (snapshot == null) { + throw new IllegalArgumentException("Snapshot with ID " + snapshotId + " not found for table " + + table); + } + final Integer schemaId = snapshot.schemaId(); + if (schemaId == null) { + throw new IllegalArgumentException("Snapshot with ID " + snapshotId + " does not have a schema ID"); + } + return getSchemaForId(table, schemaId); + } + + private static Schema getSchemaForCurrentSnapshot(final Table table) { + final Snapshot currentSnapshot = table.currentSnapshot(); + if (currentSnapshot == null) { + throw new IllegalArgumentException("Table " + table + " does not have a current snapshot"); + } + final Integer schemaId = currentSnapshot.schemaId(); + if (schemaId == null) { + throw new IllegalArgumentException("Current snapshot with ID " + currentSnapshot.snapshotId() + + " for table " + table + " does not have a schema ID"); + } + return getSchemaForId(table, schemaId); + } +} + diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/TableParquetWriterOptions.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/TableParquetWriterOptions.java new file mode 100644 index 00000000000..98f2211907a --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/TableParquetWriterOptions.java @@ -0,0 +1,130 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import io.deephaven.annotations.BuildableStyle; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.parquet.table.ParquetInstructions; +import org.immutables.value.Value; +import org.jetbrains.annotations.NotNull; + +import java.util.Map; + +import static io.deephaven.parquet.table.ParquetInstructions.MIN_TARGET_PAGE_SIZE; + +/** + * This class provides instructions for building {@link IcebergTableWriter} intended for writing Iceberg tables as + * Parquet data files. The default values documented in this class may change in the future. As such, callers may wish + * to explicitly set the values. + */ +@Value.Immutable +@BuildableStyle +public abstract class TableParquetWriterOptions extends TableWriterOptions { + + public static Builder builder() { + return ImmutableTableParquetWriterOptions.builder(); + } + + /** + * The name of the compression codec to use when writing Parquet files; defaults to + * {@link ParquetInstructions#DEFAULT_COMPRESSION_CODEC_NAME}. + */ + @Value.Default + public String compressionCodecName() { + return ParquetInstructions.DEFAULT_COMPRESSION_CODEC_NAME; + } + + /** + * The maximum number of unique keys the parquet file writer should add to a dictionary page before switching to + * non-dictionary encoding; defaults to {@value ParquetInstructions#DEFAULT_MAXIMUM_DICTIONARY_KEYS}; never + * evaluated for non-String columns. + */ + @Value.Default + public int maximumDictionaryKeys() { + return ParquetInstructions.DEFAULT_MAXIMUM_DICTIONARY_KEYS; + } + + /** + * The maximum number of bytes the parquet file writer should add to a dictionary before switching to non-dictionary + * encoding; defaults to {@value ParquetInstructions#DEFAULT_MAXIMUM_DICTIONARY_SIZE}; never evaluated for + * non-String columns. + */ + @Value.Default + public int maximumDictionarySize() { + return ParquetInstructions.DEFAULT_MAXIMUM_DICTIONARY_SIZE; + } + + /** + * The target page size for writing the parquet files; defaults to + * {@link ParquetInstructions#DEFAULT_TARGET_PAGE_SIZE}, should be greater than or equal to + * {@link ParquetInstructions#MIN_TARGET_PAGE_SIZE}. + */ + @Value.Default + public int targetPageSize() { + return ParquetInstructions.DEFAULT_TARGET_PAGE_SIZE; + } + + /** + * Convert this to a {@link ParquetInstructions}. + * + * @param onWriteCompleted The callback to be invoked after writing the parquet file. + * @param tableDefinition The table definition to be populated inside the parquet file's schema + * @param fieldIdToName Mapping of field id to field name, to be populated inside the parquet file's schema + */ + ParquetInstructions toParquetInstructions( + @NotNull final ParquetInstructions.OnWriteCompleted onWriteCompleted, + @NotNull final TableDefinition tableDefinition, + @NotNull final Map fieldIdToName) { + final ParquetInstructions.Builder builder = new ParquetInstructions.Builder(); + + dataInstructions().ifPresent(builder::setSpecialInstructions); + + // Add parquet writing specific instructions. + builder.setTableDefinition(tableDefinition); + for (final Map.Entry entry : fieldIdToName.entrySet()) { + builder.setFieldId(entry.getValue(), entry.getKey()); + } + builder.setCompressionCodecName(compressionCodecName()); + builder.setMaximumDictionaryKeys(maximumDictionaryKeys()); + builder.setMaximumDictionarySize(maximumDictionarySize()); + builder.setTargetPageSize(targetPageSize()); + builder.setOnWriteCompleted(onWriteCompleted); + + return builder.build(); + } + + public interface Builder extends TableWriterOptions.Builder { + Builder compressionCodecName(String compressionCodecName); + + Builder maximumDictionaryKeys(int maximumDictionaryKeys); + + Builder maximumDictionarySize(int maximumDictionarySize); + + Builder targetPageSize(int targetPageSize); + + TableParquetWriterOptions build(); + } + + @Value.Check + final void boundsCheckMaxDictionaryKeys() { + if (maximumDictionaryKeys() < 0) { + throw new IllegalArgumentException("maximumDictionaryKeys(=" + maximumDictionaryKeys() + ") must be >= 0"); + } + } + + @Value.Check + final void boundsCheckMaxDictionarySize() { + if (maximumDictionarySize() < 0) { + throw new IllegalArgumentException("maximumDictionarySize(=" + maximumDictionarySize() + ") must be >= 0"); + } + } + + @Value.Check + final void boundsCheckTargetPageSize() { + if (targetPageSize() < MIN_TARGET_PAGE_SIZE) { + throw new IllegalArgumentException( + "targetPageSize(=" + targetPageSize() + ") must be >= " + MIN_TARGET_PAGE_SIZE); + } + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/TableWriterOptions.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/TableWriterOptions.java new file mode 100644 index 00000000000..95bcb2d8036 --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/TableWriterOptions.java @@ -0,0 +1,114 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.util; + +import io.deephaven.engine.table.TableDefinition; +import org.apache.iceberg.Schema; +import org.immutables.value.Value; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +public abstract class TableWriterOptions { + + /** + * The {@link TableDefinition} to use when writing Iceberg data files, instead of the one implied by the table being + * written itself. This definition can be used to skip some columns or add additional columns with {@code null} + * values. + */ + public abstract TableDefinition tableDefinition(); + + /** + * The data instructions to use for reading/writing the Iceberg data files (might be S3Instructions or other cloud + * provider-specific instructions). + */ + public abstract Optional dataInstructions(); + + /** + * Used to extract a {@link Schema} from a table. That schema will be used in conjunction with the + * {@link #fieldIdToColumnName()} to map Deephaven columns from {@link #tableDefinition()} to Iceberg columns. If + * {@link #fieldIdToColumnName()} is not provided, the mapping is done by column name. + *

+ * Users can specify how to extract the schema in multiple ways (by schema ID, snapshot ID, etc.). + *

+ * Defaults to {@link SchemaProvider#fromCurrent()}, which means use the current schema from the table. + */ + @Value.Default + public SchemaProvider schemaProvider() { + return SchemaProvider.fromCurrent(); + } + + /** + * A one-to-one {@link Map map} from Iceberg field IDs from the {@link #schemaProvider()} to Deephaven column names + * from the {@link #tableDefinition()}. + */ + public abstract Map fieldIdToColumnName(); + + /** + * A reverse mapping of {@link #fieldIdToColumnName()}. + */ + @Value.Lazy + Map dhColumnNameToFieldId() { + final Map reversedMap = new HashMap<>(fieldIdToColumnName().size()); + for (final Map.Entry entry : fieldIdToColumnName().entrySet()) { + reversedMap.put(entry.getValue(), entry.getKey()); + } + return reversedMap; + } + + // @formatter:off + interface Builder> { + // @formatter:on + INSTRUCTIONS_BUILDER tableDefinition(TableDefinition tableDefinition); + + INSTRUCTIONS_BUILDER dataInstructions(Object s3Instructions); + + INSTRUCTIONS_BUILDER schemaProvider(SchemaProvider schemaProvider); + + INSTRUCTIONS_BUILDER putFieldIdToColumnName(int value, String key); + + INSTRUCTIONS_BUILDER putAllFieldIdToColumnName(Map entries); + } + + /** + * Check all column names present in the {@link #fieldIdToColumnName()} map are present in the + * {@link #tableDefinition()}. + */ + @Value.Check + final void checkDhColumnsToIcebergFieldIds() { + if (!fieldIdToColumnName().isEmpty()) { + final Set columnNamesFromDefinition = tableDefinition().getColumnNameSet(); + final Map fieldIdToColumnName = fieldIdToColumnName(); + for (final String columnNameFromMap : fieldIdToColumnName.values()) { + if (!columnNamesFromDefinition.contains(columnNameFromMap)) { + throw new IllegalArgumentException("Column " + columnNameFromMap + " not found in table " + + "definition, available columns are: " + columnNamesFromDefinition); + } + } + } + } + + @Value.Check + final void checkOneToOneMapping() { + final Collection columnNames = new HashSet<>(fieldIdToColumnName().size()); + for (final String columnName : fieldIdToColumnName().values()) { + if (columnNames.contains(columnName)) { + throw new IllegalArgumentException("Duplicate mapping found: " + columnName + " in field Id to column" + + " name map, expected one-to-one mapping"); + } + columnNames.add(columnName); + } + } + + @Value.Check + final void checkNonEmptyDefinition() { + if (tableDefinition().numColumns() == 0) { + throw new IllegalArgumentException("Cannot write to an Iceberg table using empty table definition"); + } + } +} diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java index 3759ebc3305..52a6f5a4b9b 100644 --- a/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java @@ -29,7 +29,7 @@ * See TESTING.md and generate-pyiceberg-1.py for more details. */ @Tag("security-manager-allow") -public class PyIceberg1Test { +class PyIceberg1Test { private static final Namespace NAMESPACE = Namespace.of("dh-default"); private static final TableIdentifier CITIES_ID = TableIdentifier.of(NAMESPACE, "cities"); diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/FileWarehouseSqliteCatalogTest.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/FileWarehouseSqliteCatalogTest.java index 75401946e50..9b4eabe929d 100644 --- a/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/FileWarehouseSqliteCatalogTest.java +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/FileWarehouseSqliteCatalogTest.java @@ -6,6 +6,7 @@ import io.deephaven.iceberg.sqlite.SqliteHelper; import io.deephaven.iceberg.util.IcebergCatalogAdapter; import io.deephaven.iceberg.util.IcebergTools; +import org.jetbrains.annotations.Nullable; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.TestInfo; @@ -17,7 +18,13 @@ * served via local file IO. */ @Tag("security-manager-allow") -public final class FileWarehouseSqliteCatalogTest extends SqliteCatalogBase { +final class FileWarehouseSqliteCatalogTest extends SqliteCatalogBase { + + @Override + @Nullable + public Object dataInstructions() { + return null; + } @Override protected IcebergCatalogAdapter catalogAdapter(TestInfo testInfo, Path rootDir, Map properties) { diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/SqliteCatalogBase.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/SqliteCatalogBase.java index 1f8522e45f8..11962bf71ec 100644 --- a/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/SqliteCatalogBase.java +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/SqliteCatalogBase.java @@ -3,37 +3,70 @@ // package io.deephaven.iceberg.junit5; +import io.deephaven.UncheckedDeephavenException; +import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.table.impl.PartitionAwareSourceTable; +import io.deephaven.engine.table.impl.select.FormulaEvaluationException; +import io.deephaven.engine.testutil.ControlledUpdateGraph; +import io.deephaven.engine.util.TableTools; +import io.deephaven.iceberg.base.IcebergUtils; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.iceberg.sqlite.SqliteHelper; import io.deephaven.iceberg.util.IcebergCatalogAdapter; +import io.deephaven.iceberg.util.IcebergReadInstructions; import io.deephaven.iceberg.util.IcebergTableAdapter; +import io.deephaven.iceberg.util.IcebergTableImpl; +import io.deephaven.iceberg.util.IcebergTableWriter; +import io.deephaven.iceberg.util.IcebergUpdateMode; +import io.deephaven.iceberg.util.IcebergWriteInstructions; +import io.deephaven.iceberg.util.TableParquetWriterOptions; +import io.deephaven.parquet.table.ParquetInstructions; +import io.deephaven.parquet.table.ParquetTools; +import io.deephaven.parquet.table.location.ParquetTableLocationKey; +import io.deephaven.qst.type.Type; +import org.apache.iceberg.AppendFiles; +import org.apache.iceberg.DataFile; import org.apache.iceberg.Schema; +import org.apache.iceberg.Snapshot; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.types.Types; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.jetbrains.annotations.Nullable; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.io.TempDir; +import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.Path; +import java.time.LocalDate; import java.util.HashMap; import java.util.Map; +import java.util.List; +import java.util.stream.Collectors; +import static io.deephaven.engine.testutil.TstUtils.assertTableEquals; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; public abstract class SqliteCatalogBase { - protected IcebergCatalogAdapter catalogAdapter; - private EngineCleanup engineCleanup = new EngineCleanup(); + private IcebergCatalogAdapter catalogAdapter; + private final EngineCleanup engineCleanup = new EngineCleanup(); protected abstract IcebergCatalogAdapter catalogAdapter(TestInfo testInfo, Path rootDir, Map properties) throws Exception; + @Nullable + protected abstract Object dataInstructions(); + @BeforeEach void setUp(TestInfo testInfo, @TempDir Path rootDir) throws Exception { engineCleanup.setUp(); @@ -47,6 +80,15 @@ void tearDown() throws Exception { engineCleanup.tearDown(); } + private TableParquetWriterOptions.Builder writerOptionsBuilder() { + final TableParquetWriterOptions.Builder builder = TableParquetWriterOptions.builder(); + final Object dataInstructions; + if ((dataInstructions = dataInstructions()) != null) { + return builder.dataInstructions(dataInstructions); + } + return builder; + } + @Test void empty() { assertThat(catalogAdapter.listNamespaces()).isEmpty(); @@ -79,4 +121,849 @@ void createEmptyTable() { } assertThat(table.isEmpty()).isTrue(); } + + @Test + void appendTableBasicTest() { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, source.getDefinition()); + { + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + } + + Table fromIceberg = tableAdapter.table(); + assertTableEquals(source, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append")); + + // Append more data with different compression codec + final Table moreData = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + final IcebergTableWriter lz4TableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .compressionCodecName("LZ4") + .build()); + lz4TableWriter.append(IcebergWriteInstructions.builder() + .addTables(moreData) + .build()); + + fromIceberg = tableAdapter.table(); + final Table expected = TableTools.merge(source, moreData); + assertTableEquals(expected, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append", "append")); + + // Append an empty table + final Table emptyTable = TableTools.emptyTable(0) + .update("intCol = (int) 4 * i + 30", + "doubleCol = (double) 4.5 * i + 30"); + lz4TableWriter.append(IcebergWriteInstructions.builder() + .addTables(emptyTable) + .build()); + fromIceberg = tableAdapter.table(); + assertTableEquals(expected, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append", "append", "append")); + + // Append multiple tables in a single call with different compression codec + final Table someMoreData = TableTools.emptyTable(3) + .update("intCol = (int) 5 * i + 40", + "doubleCol = (double) 5.5 * i + 40"); + { + final IcebergTableWriter gzipTableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .compressionCodecName("GZIP") + .build()); + gzipTableWriter.append(IcebergWriteInstructions.builder() + .addTables(someMoreData, moreData, emptyTable) + .build()); + } + + fromIceberg = tableAdapter.table(); + final Table expected2 = TableTools.merge(expected, someMoreData, moreData); + assertTableEquals(expected2, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append", "append", "append", "append")); + } + + private void verifySnapshots(final TableIdentifier tableIdentifier, final List expectedOperations) { + final Iterable snapshots = catalogAdapter.catalog().loadTable(tableIdentifier).snapshots(); + assertThat(snapshots).map(Snapshot::operation).isEqualTo(expectedOperations); + } + + @Test + void appendWithDifferentDefinition() { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, source.getDefinition()); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + Table fromIceberg = tableAdapter.table(); + assertTableEquals(source, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append")); + + // Append a table with just the int column + final Table expected; + { + final IcebergTableWriter tableWriterWithOneColumn = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(TableDefinition.of(ColumnDefinition.ofInt("intCol"))) + .build()); + final Table singleColumnSource = TableTools.emptyTable(10) + .update("intCol = (int) 5 * i + 10"); + tableWriterWithOneColumn.append(IcebergWriteInstructions.builder() + .addTables(singleColumnSource) + .build()); + fromIceberg = tableAdapter.table(); + expected = TableTools.merge(source, singleColumnSource.update("doubleCol = NULL_DOUBLE")); + assertTableEquals(expected, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append", "append")); + } + + // Append more data + final Table moreData = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(moreData) + .build()); + fromIceberg = tableAdapter.table(); + final Table expected2 = TableTools.merge(expected, moreData); + assertTableEquals(expected2, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append", "append", "append")); + + // Append an empty table + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(TableTools.emptyTable(0).update( + "intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20")) + .build()); + fromIceberg = tableAdapter.table(); + assertTableEquals(expected2, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append", "append", "append", "append")); + } + + @Test + void appendMultipleTablesWithDifferentDefinitionTest() { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, source.getDefinition()); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + Table fromIceberg = tableAdapter.table(); + assertTableEquals(source, fromIceberg); + + try { + final Table appendTable = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20", + "shortCol = (short) 3 * i + 20"); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(appendTable) + .build()); + failBecauseExceptionWasNotThrown(UncheckedDeephavenException.class); + } catch (TableDefinition.IncompatibleTableDefinitionException e) { + // Table definition mismatch between table writer and append table + assertThat(e).hasMessageContaining("Table definition"); + } + } + + @Test + void appendToCatalogTableWithAllDataTypesTest() { + final Schema schema = new Schema( + Types.NestedField.required(1, "booleanCol", Types.BooleanType.get()), + Types.NestedField.required(2, "doubleCol", Types.DoubleType.get()), + Types.NestedField.required(3, "floatCol", Types.FloatType.get()), + Types.NestedField.required(4, "intCol", Types.IntegerType.get()), + Types.NestedField.required(5, "longCol", Types.LongType.get()), + Types.NestedField.required(6, "stringCol", Types.StringType.get()), + Types.NestedField.required(7, "instantCol", Types.TimestampType.withZone()), + Types.NestedField.required(8, "localDateTimeCol", Types.TimestampType.withoutZone()), + Types.NestedField.required(9, "localDateCol", Types.DateType.get()), + Types.NestedField.required(10, "localTimeCol", Types.TimeType.get()), + Types.NestedField.required(11, "binaryCol", Types.BinaryType.get())); + final Namespace myNamespace = Namespace.of("MyNamespace"); + final TableIdentifier myTableId = TableIdentifier.of(myNamespace, "MyTableWithAllDataTypes"); + catalogAdapter.catalog().createTable(myTableId, schema); + + final Table source = TableTools.emptyTable(10) + .update( + "booleanCol = i % 2 == 0", + "doubleCol = (double) 2.5 * i + 10", + "floatCol = (float) (2.5 * i + 10)", + "intCol = 2 * i + 10", + "longCol = (long) (2 * i + 10)", + "stringCol = String.valueOf(2 * i + 10)", + "instantCol = java.time.Instant.now()", + "localDateTimeCol = java.time.LocalDateTime.now()", + "localDateCol = java.time.LocalDate.now()", + "localTimeCol = java.time.LocalTime.now()", + "binaryCol = new byte[] {(byte) i}"); + final IcebergTableAdapter tableAdapter = catalogAdapter.loadTable(myTableId); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + final Table fromIceberg = tableAdapter.table(); + assertTableEquals(source, fromIceberg); + } + + @Test + void testFailureInWrite() { + // Try creating a new iceberg table with bad data + final Table badSource = TableTools.emptyTable(5) + .updateView( + "stringCol = ii % 2 == 0 ? Long.toString(ii) : null", + "intCol = (int) stringCol.charAt(0)"); + final Namespace myNamespace = Namespace.of("MyNamespace"); + final TableIdentifier tableIdentifier = TableIdentifier.of(myNamespace, "MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, badSource.getDefinition()); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(badSource.getDefinition()) + .build()); + + try { + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(badSource) + .build()); + failBecauseExceptionWasNotThrown(UncheckedDeephavenException.class); + } catch (UncheckedDeephavenException e) { + // Exception expected for invalid formula in table + assertThat(e).cause().isInstanceOf(FormulaEvaluationException.class); + } + + // Now create a table with good data with same schema and append a bad source to it + final Table goodSource = TableTools.emptyTable(5) + .update("stringCol = Long.toString(ii)", + "intCol = (int) i"); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(goodSource) + .build()); + Table fromIceberg = tableAdapter.table(); + assertTableEquals(goodSource, fromIceberg); + + try { + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(badSource) + .build()); + failBecauseExceptionWasNotThrown(UncheckedDeephavenException.class); + } catch (UncheckedDeephavenException e) { + // Exception expected for invalid formula in table + assertThat(e).cause().isInstanceOf(FormulaEvaluationException.class); + } + + try { + final IcebergTableWriter badWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(TableDefinition.of(ColumnDefinition.ofDouble("doubleCol"))) + .build()); + failBecauseExceptionWasNotThrown(UncheckedDeephavenException.class); + } catch (IllegalArgumentException e) { + // Exception expected because "doubleCol" is not present in the table + assertThat(e).hasMessageContaining("Column doubleCol not found in the schema"); + } + + // Make sure existing good data is not deleted + assertThat(catalogAdapter.listNamespaces()).contains(myNamespace); + assertThat(catalogAdapter.listTables(myNamespace)).containsExactly(tableIdentifier); + fromIceberg = tableAdapter.table(); + assertTableEquals(goodSource, fromIceberg); + } + + @Test + void testColumnRenameWhileWriting() throws URISyntaxException { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final TableDefinition originalDefinition = source.getDefinition(); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, originalDefinition); + { + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + + verifyDataFiles(tableIdentifier, List.of(source)); + } + + // Get field IDs for the columns for this table + final Map nameToFieldIdFromSchema = new HashMap<>(); + final Schema schema = tableAdapter.icebergTable().schema(); + for (final Types.NestedField field : schema.columns()) { + nameToFieldIdFromSchema.put(field.name(), field.fieldId()); + } + + { + final List parquetFiles = getAllParquetFilesFromDataFiles(tableIdentifier); + assertThat(parquetFiles).hasSize(1); + verifyFieldIdsFromParquetFile(parquetFiles.get(0), originalDefinition.getColumnNames(), + nameToFieldIdFromSchema); + } + + final Table moreData = TableTools.emptyTable(5) + .update("newIntCol = (int) 3 * i + 20", + "newDoubleCol = (double) 3.5 * i + 20"); + { + // Now append more data to it but with different column names and field Id mapping + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(moreData.getDefinition()) + .putFieldIdToColumnName(nameToFieldIdFromSchema.get("intCol"), "newIntCol") + .putFieldIdToColumnName(nameToFieldIdFromSchema.get("doubleCol"), "newDoubleCol") + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(moreData) + .build()); + + verifyDataFiles(tableIdentifier, List.of(moreData, source)); + + final Map newNameToFieldId = new HashMap<>(); + newNameToFieldId.put("newIntCol", nameToFieldIdFromSchema.get("intCol")); + newNameToFieldId.put("newDoubleCol", nameToFieldIdFromSchema.get("doubleCol")); + + final List parquetFiles = getAllParquetFilesFromDataFiles(tableIdentifier); + assertThat(parquetFiles).hasSize(2); + verifyFieldIdsFromParquetFile(parquetFiles.get(0), moreData.getDefinition().getColumnNames(), + newNameToFieldId); + verifyFieldIdsFromParquetFile(parquetFiles.get(1), originalDefinition.getColumnNames(), + nameToFieldIdFromSchema); + } + + // TODO: This is failing because we don't map columns based on the column ID when reading. Uncomment this + // when #6156 is merged + // final Table fromIceberg = tableAdapter.table(); + // assertTableEquals(TableTools.merge(source, + // moreData.renameColumns("intCol = newIntCol", "doubleCol = newDoubleCol")), fromIceberg); + } + + /** + * Verify that the schema of the parquet file read from the provided path has the provided column and corresponding + * field IDs. + */ + private void verifyFieldIdsFromParquetFile( + final String path, + final List columnNames, + final Map nameToFieldId) throws URISyntaxException { + final ParquetMetadata metadata = + new ParquetTableLocationKey(new URI(path), 0, null, ParquetInstructions.builder() + .setSpecialInstructions(dataInstructions()) + .build()) + .getMetadata(); + final List columnsMetadata = metadata.getFileMetaData().getSchema().getColumns(); + + final int numColumns = columnNames.size(); + for (int colIdx = 0; colIdx < numColumns; colIdx++) { + final String columnName = columnNames.get(colIdx); + final String columnNameFromParquetFile = columnsMetadata.get(colIdx).getPath()[0]; + assertThat(columnName).isEqualTo(columnNameFromParquetFile); + + final int expectedFieldId = nameToFieldId.get(columnName); + final int fieldIdFromParquetFile = columnsMetadata.get(colIdx).getPrimitiveType().getId().intValue(); + assertThat(fieldIdFromParquetFile).isEqualTo(expectedFieldId); + } + } + + /** + * Verify that the data files in the table match the Deephaven tables in the given sequence. + */ + private void verifyDataFiles( + final TableIdentifier tableIdentifier, + final List

dhTables) { + final org.apache.iceberg.Table table = catalogAdapter.catalog().loadTable(tableIdentifier); + final List dataFileList = IcebergUtils.allDataFiles(table, table.currentSnapshot()) + .collect(Collectors.toList()); + assertThat(dataFileList).hasSize(dhTables.size()); + + // Check that each Deephaven table matches the corresponding data file in sequence + for (int i = 0; i < dhTables.size(); i++) { + final Table dhTable = dhTables.get(i); + final DataFile dataFile = dataFileList.get(i); + final String parquetFilePath = dataFile.path().toString(); + final Table fromParquet = ParquetTools.readTable(parquetFilePath, ParquetInstructions.builder() + .setSpecialInstructions(dataInstructions()) + .build()); + assertTableEquals(dhTable, fromParquet); + } + } + + /** + * Get all the parquet files in the table. + */ + private List getAllParquetFilesFromDataFiles(final TableIdentifier tableIdentifier) { + final org.apache.iceberg.Table table = catalogAdapter.catalog().loadTable(tableIdentifier); + return IcebergUtils.allDataFiles(table, table.currentSnapshot()) + .map(dataFile -> dataFile.path().toString()) + .collect(Collectors.toList()); + } + + @Test + void writeDataFilesBasicTest() { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final Table anotherSource = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, source.getDefinition()); + + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + + final List dataFilesWritten = tableWriter.writeDataFiles(IcebergWriteInstructions.builder() + .addTables(source, anotherSource) + .build()); + verifySnapshots(tableIdentifier, List.of()); + assertThat(dataFilesWritten).hasSize(2); + + // Append some data to the table + final Table moreData = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(moreData) + .build()); + { + final Table fromIceberg = tableAdapter.table(); + assertTableEquals(moreData, fromIceberg); + verifySnapshots(tableIdentifier, List.of("append")); + verifyDataFiles(tableIdentifier, List.of(moreData)); + } + + // Now commit those data files to the table + final org.apache.iceberg.Table icebergTable = catalogAdapter.catalog().loadTable(tableIdentifier); + final AppendFiles append = icebergTable.newAppend(); + dataFilesWritten.forEach(append::appendFile); + append.commit(); + + // Verify that the data files are now in the table + verifySnapshots(tableIdentifier, List.of("append", "append")); + verifyDataFiles(tableIdentifier, List.of(source, anotherSource, moreData)); + + { + // Verify thaty we read the data files in the correct order + final Table fromIceberg = tableAdapter.table(); + assertTableEquals(TableTools.merge(moreData, source, anotherSource), fromIceberg); + } + } + + @Test + void testPartitionedAppendBasic() { + final Table part1 = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final Table part2 = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + final List partitionPaths = List.of("PC=cat", "PC=apple"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + { + final TableDefinition tableDefinition = part1.getDefinition(); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, tableDefinition); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(tableDefinition) + .build()); + try { + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .addAllPartitionPaths(partitionPaths) + .build()); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + // Exception expected since partition paths provided with non partitioned table + assertThat(e).hasMessageContaining("partition paths"); + } + catalogAdapter.catalog().dropTable(tableIdentifier, true); + } + + final TableDefinition partitioningTableDef = TableDefinition.of( + ColumnDefinition.ofInt("intCol"), + ColumnDefinition.ofDouble("doubleCol"), + ColumnDefinition.ofString("PC").withPartitioning()); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, partitioningTableDef); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(partitioningTableDef) + .build()); + + try { + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .build()); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + // Exception expected since partition paths not provided with a partitioned table + assertThat(e).hasMessageContaining("partition paths"); + } + + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .addAllPartitionPaths(partitionPaths) + .build()); + final Table fromIceberg = tableAdapter.table(); + assertThat(tableAdapter.definition()).isEqualTo(partitioningTableDef); + assertThat(fromIceberg.getDefinition()).isEqualTo(partitioningTableDef); + assertThat(fromIceberg).isInstanceOf(PartitionAwareSourceTable.class); + final Table expected = TableTools.merge( + part1.update("PC = `cat`"), + part2.update("PC = `apple`")); + assertTableEquals(expected, fromIceberg.select()); + + final Table part3 = TableTools.emptyTable(5) + .update("intCol = (int) 4 * i + 30", + "doubleCol = (double) 4.5 * i + 30"); + final String partitionPath = "PC=boy"; + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part3) + .addPartitionPaths(partitionPath) + .build()); + final Table fromIceberg2 = tableAdapter.table(); + final Table expected2 = TableTools.merge( + part1.update("PC = `cat`"), + part2.update("PC = `apple`"), + part3.update("PC = `boy`")); + assertTableEquals(expected2, fromIceberg2.select()); + } + + @Test + void testPartitionedAppendBasicIntegerPartitions() { + final Table part1 = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final Table part2 = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + + final TableDefinition tableDefinition = TableDefinition.of( + ColumnDefinition.ofInt("intCol"), + ColumnDefinition.ofDouble("doubleCol"), + ColumnDefinition.ofInt("PC").withPartitioning()); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, tableDefinition); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(tableDefinition) + .build()); + + { + // Add partition paths of incorrect type + try { + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .addAllPartitionPaths(List.of("PC=cat", "PC=apple")) + .build()); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + // Exception expected since partition paths provided of incorrect type + assertThat(e).hasMessageContaining("partition path"); + } + } + + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .addAllPartitionPaths(List.of("PC=3", "PC=1")) + .build()); + final Table fromIceberg = tableAdapter.table(); + assertThat(tableAdapter.definition()).isEqualTo(tableDefinition); + assertThat(fromIceberg.getDefinition()).isEqualTo(tableDefinition); + assertThat(fromIceberg).isInstanceOf(PartitionAwareSourceTable.class); + final Table expected = TableTools.merge( + part1.update("PC = 3"), + part2.update("PC = 1")); + assertTableEquals(expected, fromIceberg.select()); + + final Table part3 = TableTools.emptyTable(5) + .update("intCol = (int) 4 * i + 30", + "doubleCol = (double) 4.5 * i + 30"); + final String partitionPath = "PC=2"; + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part3) + .addPartitionPaths(partitionPath) + .build()); + final Table fromIceberg2 = tableAdapter.table(); + final Table expected2 = TableTools.merge( + part1.update("PC = 3"), + part2.update("PC = 1"), + part3.update("PC = 2")); + assertTableEquals(expected2, fromIceberg2.select()); + } + + @Test + void testPartitionedAppendWithAllPartitioningTypes() { + final TableDefinition definition = TableDefinition.of( + ColumnDefinition.ofString("StringPC").withPartitioning(), + ColumnDefinition.ofBoolean("BooleanPC").withPartitioning(), + ColumnDefinition.ofInt("IntegerPC").withPartitioning(), + ColumnDefinition.ofLong("LongPC").withPartitioning(), + ColumnDefinition.ofFloat("FloatPC").withPartitioning(), + ColumnDefinition.ofDouble("DoublePC").withPartitioning(), + ColumnDefinition.of("LocalDatePC", Type.find(LocalDate.class)).withPartitioning(), + ColumnDefinition.ofInt("data")); + + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, definition); + + final Table source = TableTools.emptyTable(10) + .update("data = (int) 2 * i + 10"); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(definition) + .build()); + + final List partitionPaths = List.of( + "StringPC=AA/" + + "BooleanPC=true/" + + "IntegerPC=1/" + + "LongPC=2/" + + "FloatPC=3.0/" + + "DoublePC=4.0/" + + "LocalDatePC=2023-10-01"); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .addAllPartitionPaths(partitionPaths) + .build()); + final Table fromIceberg = tableAdapter.table(); + assertThat(tableAdapter.definition()).isEqualTo(definition); + assertThat(fromIceberg.getDefinition()).isEqualTo(definition); + assertThat(fromIceberg).isInstanceOf(PartitionAwareSourceTable.class); + + final Table expected = source.updateView( + "StringPC = `AA`", + "BooleanPC = (Boolean) true", + "IntegerPC = (int) 1", + "LongPC = (long) 2", + "FloatPC = (float) 3.0", + "DoublePC = (double) 4.0", + "LocalDatePC = LocalDate.parse(`2023-10-01`)") + .moveColumns(7, "data"); + assertTableEquals(expected, fromIceberg); + } + + @Test + void testManualRefreshingAppend() { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, source.getDefinition()); + { + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + } + + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + + final IcebergTableImpl fromIcebergRefreshing = + (IcebergTableImpl) tableAdapter.table(IcebergReadInstructions.builder() + .updateMode(IcebergUpdateMode.manualRefreshingMode()) + .build()); + assertTableEquals(source, fromIcebergRefreshing); + verifySnapshots(tableIdentifier, List.of("append")); + + + // Append more data with different compression codec + final Table moreData = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + { + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .compressionCodecName("LZ4") + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(moreData) + .build()); + } + + fromIcebergRefreshing.update(); + updateGraph.runWithinUnitTestCycle(fromIcebergRefreshing::refresh); + + final Table expected = TableTools.merge(source, moreData); + assertTableEquals(expected, fromIcebergRefreshing); + verifySnapshots(tableIdentifier, List.of("append", "append")); + + assertTableEquals(expected, tableAdapter.table()); + } + + @Test + void testAutomaticRefreshingAppend() throws InterruptedException { + final Table source = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, source.getDefinition()); + { + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(source) + .build()); + } + + final IcebergTableImpl fromIcebergRefreshing = + (IcebergTableImpl) tableAdapter.table(IcebergReadInstructions.builder() + .updateMode(IcebergUpdateMode.autoRefreshingMode(10)) + .build()); + assertTableEquals(source, fromIcebergRefreshing); + verifySnapshots(tableIdentifier, List.of("append")); + + // Append more data with different compression codec + final Table moreData = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + { + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(source.getDefinition()) + .compressionCodecName("LZ4") + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(moreData) + .build()); + } + + // Sleep for 0.5 second + Thread.sleep(500); + + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + updateGraph.runWithinUnitTestCycle(fromIcebergRefreshing::refresh); + + final Table expected = TableTools.merge(source, moreData); + assertTableEquals(expected, fromIcebergRefreshing); + verifySnapshots(tableIdentifier, List.of("append", "append")); + + assertTableEquals(expected, tableAdapter.table()); + } + + @Test + void testManualRefreshingPartitionedAppend() { + final Table part1 = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final Table part2 = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + final List partitionPaths = List.of("PC=apple", "PC=boy"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + + final TableDefinition tableDefinition = TableDefinition.of( + ColumnDefinition.ofInt("intCol"), + ColumnDefinition.ofDouble("doubleCol"), + ColumnDefinition.ofString("PC").withPartitioning()); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, tableDefinition); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(tableDefinition) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .addAllPartitionPaths(partitionPaths) + .build()); + + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + + final IcebergTableImpl fromIcebergRefreshing = + (IcebergTableImpl) tableAdapter.table(IcebergReadInstructions.builder() + .updateMode(IcebergUpdateMode.manualRefreshingMode()) + .build()); + assertThat(tableAdapter.definition()).isEqualTo(tableDefinition); + assertThat(fromIcebergRefreshing.getDefinition()).isEqualTo(tableDefinition); + assertThat(fromIcebergRefreshing).isInstanceOf(PartitionAwareSourceTable.class); + final Table expected = TableTools.merge( + part1.update("PC = `apple`"), + part2.update("PC = `boy`")); + assertTableEquals(expected, fromIcebergRefreshing.select()); + + final Table part3 = TableTools.emptyTable(5) + .update("intCol = (int) 4 * i + 30", + "doubleCol = (double) 4.5 * i + 30"); + final String partitionPath = "PC=cat"; + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part3) + .addPartitionPaths(partitionPath) + .build()); + + fromIcebergRefreshing.update(); + updateGraph.runWithinUnitTestCycle(fromIcebergRefreshing::refresh); + + final Table expected2 = TableTools.merge(expected, part3.update("PC = `cat`")); + assertTableEquals(expected2, fromIcebergRefreshing.select()); + } + + @Test + void testAutoRefreshingPartitionedAppend() throws InterruptedException { + final Table part1 = TableTools.emptyTable(10) + .update("intCol = (int) 2 * i + 10", + "doubleCol = (double) 2.5 * i + 10"); + final Table part2 = TableTools.emptyTable(5) + .update("intCol = (int) 3 * i + 20", + "doubleCol = (double) 3.5 * i + 20"); + final List partitionPaths = List.of("PC=apple", "PC=boy"); + final TableIdentifier tableIdentifier = TableIdentifier.parse("MyNamespace.MyTable"); + + final TableDefinition tableDefinition = TableDefinition.of( + ColumnDefinition.ofInt("intCol"), + ColumnDefinition.ofDouble("doubleCol"), + ColumnDefinition.ofString("PC").withPartitioning()); + final IcebergTableAdapter tableAdapter = catalogAdapter.createTable(tableIdentifier, tableDefinition); + final IcebergTableWriter tableWriter = tableAdapter.tableWriter(writerOptionsBuilder() + .tableDefinition(tableDefinition) + .build()); + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part1, part2) + .addAllPartitionPaths(partitionPaths) + .build()); + + final ControlledUpdateGraph updateGraph = ExecutionContext.getContext().getUpdateGraph().cast(); + + final IcebergTableImpl fromIcebergRefreshing = + (IcebergTableImpl) tableAdapter.table(IcebergReadInstructions.builder() + .updateMode(IcebergUpdateMode.autoRefreshingMode(10)) + .build()); + assertThat(tableAdapter.definition()).isEqualTo(tableDefinition); + assertThat(fromIcebergRefreshing.getDefinition()).isEqualTo(tableDefinition); + assertThat(fromIcebergRefreshing).isInstanceOf(PartitionAwareSourceTable.class); + final Table expected = TableTools.merge( + part1.update("PC = `apple`"), + part2.update("PC = `boy`")); + assertTableEquals(expected, fromIcebergRefreshing.select()); + + final Table part3 = TableTools.emptyTable(5) + .update("intCol = (int) 4 * i + 30", + "doubleCol = (double) 4.5 * i + 30"); + final String partitionPath = "PC=cat"; + tableWriter.append(IcebergWriteInstructions.builder() + .addTables(part3) + .addPartitionPaths(partitionPath) + .build()); + + // Sleep for 0.5 second + Thread.sleep(500); + + updateGraph.runWithinUnitTestCycle(fromIcebergRefreshing::refresh); + + final Table expected2 = TableTools.merge(expected, part3.update("PC = `cat`")); + assertTableEquals(expected2, fromIcebergRefreshing.select()); + } } diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java index 8cf51a65e7e..d14916a6f93 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileWriter.java @@ -72,6 +72,13 @@ public RowGroupWriter addRowGroup(final long size) { return rowGroupWriter; } + /** + * Get the number of bytes written to the parquet file so far. + */ + public long bytesWritten() { + return countingOutput.getCount(); + } + @Override public void close() throws IOException { serializeOffsetIndexes(); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/CompletedParquetWrite.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/CompletedParquetWrite.java new file mode 100644 index 00000000000..90ab4989a03 --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/CompletedParquetWrite.java @@ -0,0 +1,61 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.parquet.table; + +import io.deephaven.annotations.BuildableStyle; +import org.immutables.value.Value; + +import java.net.URI; + +/** + * This class is used as a return POJO for the result of a Parquet write operation. + *

+ * It is intended to be used with the {@link ParquetInstructions#onWriteCompleted()} callback. + */ +@Value.Immutable +@BuildableStyle +public abstract class CompletedParquetWrite { + /** + * The destination URI of the written Parquet file. + */ + public abstract URI destination(); + + /** + * The number of rows written to the Parquet file. + */ + public abstract long numRows(); + + /** + * The number of bytes written to the Parquet file. + */ + public abstract long numBytes(); + + public static Builder builder() { + return ImmutableCompletedParquetWrite.builder(); + } + + interface Builder { + Builder destination(URI destination); + + Builder numRows(long numRows); + + Builder numBytes(long numBytes); + + CompletedParquetWrite build(); + } + + @Value.Check + final void numRowsBoundsCheck() { + if (numRows() < 0) { + throw new IllegalArgumentException("numRows must be non-negative"); + } + } + + @Value.Check + final void numBytesBoundsCheck() { + if (numBytes() <= 0) { + throw new IllegalArgumentException("numBytes must be positive"); + } + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java index 07bb2df5549..d8fa3ff9aa7 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java @@ -59,6 +59,10 @@ static TableDefinition ensureDefinition(final ParquetInstructions parquetInstruc private static final boolean DEFAULT_IS_REFRESHING = false; + public interface OnWriteCompleted { + void onWriteCompleted(CompletedParquetWrite completedParquetWrite); + } + public enum ParquetFileLayout { /** * A single parquet file. @@ -196,6 +200,12 @@ public abstract ParquetInstructions withTableDefinitionAndLayout(final TableDefi */ public abstract String baseNameForPartitionedParquetData(); + /** + * @return A callback to be executed when on completing each parquet data file write (excluding the index and + * metadata files). This callback gets invoked by the writing thread in a linear fashion. + */ + public abstract Optional onWriteCompleted(); + @VisibleForTesting public static boolean sameColumnNamesAndCodecMappings(final ParquetInstructions i1, final ParquetInstructions i2) { if (i1 == EMPTY) { @@ -323,7 +333,7 @@ public ParquetInstructions withTableDefinitionAndLayout( return new ReadOnly(null, null, getCompressionCodecName(), getMaximumDictionaryKeys(), getMaximumDictionarySize(), isLegacyParquet(), getTargetPageSize(), isRefreshing(), getSpecialInstructions(), generateMetadataFiles(), baseNameForPartitionedParquetData(), - useLayout, useDefinition, null); + useLayout, useDefinition, null, null); } @Override @@ -331,7 +341,12 @@ ParquetInstructions withIndexColumns(final Collection> indexColumns return new ReadOnly(null, null, getCompressionCodecName(), getMaximumDictionaryKeys(), getMaximumDictionarySize(), isLegacyParquet(), getTargetPageSize(), isRefreshing(), getSpecialInstructions(), generateMetadataFiles(), baseNameForPartitionedParquetData(), - null, null, indexColumns); + null, null, indexColumns, null); + } + + @Override + public Optional onWriteCompleted() { + return Optional.empty(); } }; @@ -442,6 +457,7 @@ private static final class ReadOnly extends ParquetInstructions { private final ParquetFileLayout fileLayout; private final TableDefinition tableDefinition; private final Collection> indexColumns; + private final OnWriteCompleted onWriteCompleted; private ReadOnly( final KeyedObjectHashMap columnNameToInstructions, @@ -457,7 +473,8 @@ private ReadOnly( final String baseNameForPartitionedParquetData, final ParquetFileLayout fileLayout, final TableDefinition tableDefinition, - final Collection> indexColumns) { + final Collection> indexColumns, + final OnWriteCompleted onWriteCompleted) { this.columnNameToInstructions = columnNameToInstructions; this.parquetColumnNameToInstructions = parquetColumnNameToColumnName; this.compressionCodecName = compressionCodecName; @@ -475,6 +492,7 @@ private ReadOnly( : indexColumns.stream() .map(List::copyOf) .collect(Collectors.toUnmodifiableList()); + this.onWriteCompleted = onWriteCompleted; } private T getOrDefault(final String columnName, final T defaultValue, @@ -617,7 +635,7 @@ public ParquetInstructions withTableDefinitionAndLayout( getCompressionCodecName(), getMaximumDictionaryKeys(), getMaximumDictionarySize(), isLegacyParquet(), getTargetPageSize(), isRefreshing(), getSpecialInstructions(), generateMetadataFiles(), baseNameForPartitionedParquetData(), useLayout, useDefinition, - indexColumns); + indexColumns, onWriteCompleted); } @Override @@ -626,7 +644,12 @@ ParquetInstructions withIndexColumns(final Collection> useIndexColu getCompressionCodecName(), getMaximumDictionaryKeys(), getMaximumDictionarySize(), isLegacyParquet(), getTargetPageSize(), isRefreshing(), getSpecialInstructions(), generateMetadataFiles(), baseNameForPartitionedParquetData(), fileLayout, - tableDefinition, useIndexColumns); + tableDefinition, useIndexColumns, onWriteCompleted); + } + + @Override + public Optional onWriteCompleted() { + return Optional.ofNullable(onWriteCompleted); } KeyedObjectHashMap copyColumnNameToInstructions() { @@ -685,6 +708,7 @@ public static class Builder { private ParquetFileLayout fileLayout; private TableDefinition tableDefinition; private Collection> indexColumns; + private OnWriteCompleted onWriteCompleted; /** * For each additional field added, make sure to update the copy constructor builder @@ -712,6 +736,7 @@ public Builder(final ParquetInstructions parquetInstructions) { fileLayout = readOnlyParquetInstructions.getFileLayout().orElse(null); tableDefinition = readOnlyParquetInstructions.getTableDefinition().orElse(null); indexColumns = readOnlyParquetInstructions.getIndexColumns().orElse(null); + onWriteCompleted = readOnlyParquetInstructions.onWriteCompleted().orElse(null); } public Builder addColumnNameMapping(final String parquetColumnName, final String columnName) { @@ -940,6 +965,15 @@ public Builder addAllIndexColumns(final Iterable> indexColumns) { return this; } + /** + * Adds a callback to be executed when on completing each parquet data file write (excluding the index and + * metadata files). + */ + public Builder setOnWriteCompleted(final OnWriteCompleted onWriteCompleted) { + this.onWriteCompleted = onWriteCompleted; + return this; + } + public ParquetInstructions build() { final KeyedObjectHashMap columnNameToInstructionsOut = columnNameToInstructions; columnNameToInstructions = null; @@ -949,7 +983,7 @@ public ParquetInstructions build() { return new ReadOnly(columnNameToInstructionsOut, parquetColumnNameToColumnNameOut, compressionCodecName, maximumDictionaryKeys, maximumDictionarySize, isLegacyParquet, targetPageSize, isRefreshing, specialInstructions, generateMetadataFiles, baseNameForPartitionedParquetData, fileLayout, - tableDefinition, indexColumns); + tableDefinition, indexColumns, onWriteCompleted); } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java index ebb1d17571d..c4fdfbb11d8 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTableWriter.java @@ -173,8 +173,14 @@ static void write( if (!sortedColumns.isEmpty()) { tableInfoBuilder.addSortingColumns(SortColumnInfo.of(sortedColumns.get(0))); } - write(t, definition, writeInstructions, dest, destOutputStream, incomingMeta, + final long numBytes = write(t, definition, writeInstructions, dest, destOutputStream, incomingMeta, tableInfoBuilder, metadataFileWriter, computedCache); + writeInstructions.onWriteCompleted() + .ifPresent(callback -> callback.onWriteCompleted(CompletedParquetWrite.builder() + .destination(dest) + .numRows(t.size()) + .numBytes(numBytes) + .build())); } /** @@ -191,9 +197,11 @@ static void write( * @param metadataFileWriter The writer for the {@value ParquetUtils#METADATA_FILE_NAME} and * {@value ParquetUtils#COMMON_METADATA_FILE_NAME} files * @param computedCache Per column cache tags + * @return The number of bytes written + * * @throws IOException For file writing related errors */ - private static void write( + private static long write( @NotNull final Table table, @NotNull final TableDefinition definition, @NotNull final ParquetInstructions writeInstructions, @@ -207,13 +215,18 @@ private static void write( final Table t = pretransformTable(table, definition); final TrackingRowSet tableRowSet = t.getRowSet(); final Map> columnSourceMap = t.getColumnSourceMap(); - try (final ParquetFileWriter parquetFileWriter = getParquetFileWriter(computedCache, definition, - tableRowSet, columnSourceMap, dest, destOutputStream, writeInstructions, tableMeta, - tableInfoBuilder, metadataFileWriter)) { + final long numBytesWritten; + { + final ParquetFileWriter parquetFileWriter = getParquetFileWriter(computedCache, definition, + tableRowSet, columnSourceMap, dest, destOutputStream, writeInstructions, tableMeta, + tableInfoBuilder, metadataFileWriter); // Given the transformation, do not use the original table's "definition" for writing write(t, writeInstructions, parquetFileWriter, computedCache); + parquetFileWriter.close(); + numBytesWritten = parquetFileWriter.bytesWritten(); } destOutputStream.done(); + return numBytesWritten; } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java index a03294a28ed..aaf3cf616d8 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java @@ -596,11 +596,12 @@ private static void writeTablesImpl( // Write the tables without any index info for (int tableIdx = 0; tableIdx < sources.length; tableIdx++) { final Table source = sources[tableIdx]; + final URI tableDestination = destinations[tableIdx]; final CompletableOutputStream outputStream = channelsProvider.getOutputStream( - destinations[tableIdx], PARQUET_OUTPUT_BUFFER_SIZE); + tableDestination, PARQUET_OUTPUT_BUFFER_SIZE); outputStreams.add(outputStream); - ParquetTableWriter.write(source, definition, writeInstructions, destinations[tableIdx], - outputStream, Collections.emptyMap(), (List) null, + ParquetTableWriter.write(source, definition, writeInstructions, tableDestination, outputStream, + Collections.emptyMap(), (List) null, metadataFileWriter, computedCache); } } else { @@ -622,9 +623,9 @@ private static void writeTablesImpl( for (final ParquetTableWriter.IndexWritingInfo info : indexInfoList) { outputStreams.add(info.destOutputStream); } - final Table sourceTable = sources[tableIdx]; - ParquetTableWriter.write(sourceTable, definition, writeInstructions, destinations[tableIdx], - outputStream, Collections.emptyMap(), indexInfoList, metadataFileWriter, computedCache); + final Table source = sources[tableIdx]; + ParquetTableWriter.write(source, definition, writeInstructions, tableDestination, outputStream, + Collections.emptyMap(), indexInfoList, metadataFileWriter, computedCache); } } diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 00ef39474a1..e7faf5be88c 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -2623,6 +2623,40 @@ public void testReadingParquetFilesWithDifferentPageSizes() { assertTableEquals(expected, fromDisk); } + @Test + public void testOnWriteCallback() { + // Write a few tables to disk and check the sizes and number of rows in the files + final Table table1 = TableTools.emptyTable(100_000).update( + "someIntColumn = i * 200", + "someLongColumn = ii * 500"); + final File dest1 = new File(rootFile, "table1.parquet"); + final Table table2 = TableTools.emptyTable(2000).update( + "someIntColumn = i", + "someLongColumn = ii"); + final File dest2 = new File(rootFile, "table2.parquet"); + + final List parquetFilesWritten = new ArrayList<>(); + final ParquetInstructions.OnWriteCompleted onWriteCompleted = parquetFilesWritten::add; + final ParquetInstructions writeInstructions = new ParquetInstructions.Builder() + .setOnWriteCompleted(onWriteCompleted) + .build(); + ParquetTools.writeTables(new Table[] {table1, table2}, + new String[] {dest1.getPath(), dest2.getPath()}, writeInstructions); + + assertEquals(2, parquetFilesWritten.size()); + // Check the destination URIs + assertEquals(dest1.toURI(), parquetFilesWritten.get(0).destination()); + assertEquals(dest2.toURI(), parquetFilesWritten.get(1).destination()); + + // Check the number of rows + assertEquals(100_000, parquetFilesWritten.get(0).numRows()); + assertEquals(2000, parquetFilesWritten.get(1).numRows()); + + // Check the size of the files + assertEquals(dest1.length(), parquetFilesWritten.get(0).numBytes()); + assertEquals(dest2.length(), parquetFilesWritten.get(1).numBytes()); + } + // Following is used for testing both writing APIs for parquet tables private interface TestParquetTableWriter { void writeTable(final Table table, final File destFile); diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetLocalStackTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetLocalStackTest.java index d4c69017d78..69115fee18b 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetLocalStackTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/S3ParquetLocalStackTest.java @@ -7,7 +7,6 @@ import io.deephaven.extensions.s3.testlib.SingletonContainers.LocalStack; import org.junit.Assume; import org.junit.BeforeClass; -import org.junit.Ignore; import software.amazon.awssdk.services.s3.S3AsyncClient; import java.io.IOException; diff --git a/extensions/protobuf/src/main/java/io/deephaven/protobuf/ProtobufDescriptorParserImpl.java b/extensions/protobuf/src/main/java/io/deephaven/protobuf/ProtobufDescriptorParserImpl.java index a2365ea853e..91c4850e976 100644 --- a/extensions/protobuf/src/main/java/io/deephaven/protobuf/ProtobufDescriptorParserImpl.java +++ b/extensions/protobuf/src/main/java/io/deephaven/protobuf/ProtobufDescriptorParserImpl.java @@ -32,7 +32,6 @@ import io.deephaven.function.ToShortFunction; import io.deephaven.function.TypedFunction; import io.deephaven.function.TypedFunction.Visitor; -import io.deephaven.util.QueryConstants; import java.lang.reflect.Array; import java.util.HashMap; @@ -362,40 +361,46 @@ private ProtobufFunctions functions() { } } + private ToObjectFunction maybeBypass(ToObjectFunction f) { + // Ideally, we could be very targetted in our application of null checks; in a lot of contexts, our + // implementation could know it will never be called with a null message to produce an array. + return BypassOnNull.of(f); + } + private ToObjectFunction mapChars(ToCharFunction f) { - return ToObjectFunction.of(m -> toChars(m, fd, f), Type.charType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toChars(m, fd, f), Type.charType().arrayType())); } private ToObjectFunction mapBytes(ToByteFunction f) { - return ToObjectFunction.of(m -> toBytes(m, fd, f), Type.byteType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toBytes(m, fd, f), Type.byteType().arrayType())); } private ToObjectFunction mapShorts(ToShortFunction f) { - return ToObjectFunction.of(m -> toShorts(m, fd, f), Type.shortType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toShorts(m, fd, f), Type.shortType().arrayType())); } private ToObjectFunction mapInts(ToIntFunction f) { - return ToObjectFunction.of(m -> toInts(m, fd, f), Type.intType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toInts(m, fd, f), Type.intType().arrayType())); } private ToObjectFunction mapLongs(ToLongFunction f) { - return ToObjectFunction.of(m -> toLongs(m, fd, f), Type.longType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toLongs(m, fd, f), Type.longType().arrayType())); } private ToObjectFunction mapFloats(ToFloatFunction f) { - return ToObjectFunction.of(m -> toFloats(m, fd, f), Type.floatType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toFloats(m, fd, f), Type.floatType().arrayType())); } private ToObjectFunction mapDoubles(ToDoubleFunction f) { - return ToObjectFunction.of(m -> toDoubles(m, fd, f), Type.doubleType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toDoubles(m, fd, f), Type.doubleType().arrayType())); } private ToObjectFunction mapBooleans(ToBooleanFunction f) { - return ToObjectFunction.of(m -> toBooleans(m, fd, f), Type.booleanType().arrayType()); + return maybeBypass(ToObjectFunction.of(m -> toBooleans(m, fd, f), Type.booleanType().arrayType())); } private ToObjectFunction mapGenerics(ToObjectFunction f) { - return ToObjectFunction.of(message -> toArray(message, fd, f), f.returnType().arrayType()); + return maybeBypass(ToObjectFunction.of(message -> toArray(message, fd, f), f.returnType().arrayType())); } private class ToRepeatedType implements diff --git a/extensions/protobuf/src/test/java/io/deephaven/protobuf/ProtobufDescriptorParserTest.java b/extensions/protobuf/src/test/java/io/deephaven/protobuf/ProtobufDescriptorParserTest.java index 975f13fa6c1..fd4e3a4e1e2 100644 --- a/extensions/protobuf/src/test/java/io/deephaven/protobuf/ProtobufDescriptorParserTest.java +++ b/extensions/protobuf/src/test/java/io/deephaven/protobuf/ProtobufDescriptorParserTest.java @@ -45,6 +45,7 @@ import io.deephaven.protobuf.test.ByteWrapperRepeated; import io.deephaven.protobuf.test.FieldMaskWrapper; import io.deephaven.protobuf.test.MultiRepeated; +import io.deephaven.protobuf.test.NestedArrays; import io.deephaven.protobuf.test.NestedByteWrapper; import io.deephaven.protobuf.test.NestedRepeatedTimestamps; import io.deephaven.protobuf.test.NestedRepeatedTimestamps.Timestamps; @@ -1470,6 +1471,111 @@ void twoTimestampsOneAsWellKnown() { assertThat(nf.keySet()).containsExactly(List.of("ts1"), List.of("ts2", "seconds"), List.of("ts2", "nanos")); } + @Test + void nestedArraysADirect() { + checkKey( + NestedArrays.getDescriptor(), + List.of("a_direct", "b", "c"), + Type.stringType().arrayType(), + new HashMap<>() { + { + put(NestedArrays.getDefaultInstance(), null); + + put(NestedArrays.newBuilder() + .setADirect(NestedArrays.A.getDefaultInstance()) + .build(), null); + + // c is only non-null when b has been explicitly set + + put(NestedArrays.newBuilder() + .setADirect(NestedArrays.A.newBuilder() + .setB(NestedArrays.B.getDefaultInstance()) + .build()) + .build(), new String[0]); + + put(NestedArrays.newBuilder() + .setADirect(NestedArrays.A.newBuilder() + .setB(NestedArrays.B.newBuilder() + .addC("Foo") + .addC("Bar") + .build()) + .build()) + .build(), new String[] {"Foo", "Bar"}); + } + }); + } + + @Test + void nestedArraysARepeated() { + checkKey( + NestedArrays.getDescriptor(), + List.of("a_repeated", "b", "c"), + Type.stringType().arrayType().arrayType(), + new HashMap<>() { + { + put(NestedArrays.getDefaultInstance(), new String[0][]); + put(NestedArrays.newBuilder() + .addARepeated(NestedArrays.A.getDefaultInstance()) + .addARepeated(NestedArrays.A.newBuilder() + .setB(NestedArrays.B.getDefaultInstance()) + .build()) + .addARepeated(NestedArrays.A.newBuilder() + .setB(NestedArrays.B.newBuilder() + .addC("Foo") + .addC("Bar") + .build()) + .build()) + .build(), new String[][] {null, new String[0], new String[] {"Foo", "Bar"}}); + } + }); + } + + @Test + void nestedArraysBDirect() { + checkKey( + NestedArrays.getDescriptor(), + List.of("b_direct", "c"), + Type.stringType().arrayType(), + new HashMap<>() { + { + put(NestedArrays.getDefaultInstance(), null); + + put(NestedArrays.newBuilder() + .setBDirect(NestedArrays.B.getDefaultInstance()) + .build(), new String[0]); + + put(NestedArrays.newBuilder() + .setBDirect(NestedArrays.B.newBuilder() + .addC("Foo") + .addC("Bar") + .build()) + .build(), new String[] {"Foo", "Bar"}); + } + }); + } + + @Test + void nestedArraysBRepeated() { + checkKey( + NestedArrays.getDescriptor(), + List.of("b_repeated", "c"), + Type.stringType().arrayType().arrayType(), + new HashMap<>() { + { + put(NestedArrays.getDefaultInstance(), new String[0][]); + + put(NestedArrays.newBuilder() + .addBRepeated(NestedArrays.B.getDefaultInstance()) + .addBRepeated(NestedArrays.B.newBuilder() + .addC("Foo") + .addC("Bar") + .build()) + + .build(), new String[][] {new String[0], new String[] {"Foo", "Bar"}}); + } + }); + } + private static Map, TypedFunction> nf(Descriptor descriptor) { return nf(descriptor, ProtobufDescriptorParserOptions.defaults()); } diff --git a/extensions/protobuf/src/test/proto/mytest.proto b/extensions/protobuf/src/test/proto/mytest.proto index da6a6e169db..46d6e2a9513 100644 --- a/extensions/protobuf/src/test/proto/mytest.proto +++ b/extensions/protobuf/src/test/proto/mytest.proto @@ -131,6 +131,21 @@ message RepeatedObject { repeated XYZ xyz = 1; } +message NestedArrays { + message A { + B b = 1; + } + message B { + repeated string c = 1; + } + + A a_direct = 1; + repeated A a_repeated = 2; + + B b_direct = 3; + repeated B b_repeated = 4; +} + message MultiRepeated { repeated RepeatedBasics my_basics = 1; repeated RepeatedWrappers my_wrappers = 2; diff --git a/extensions/s3/src/main/java/io/deephaven/extensions/s3/ResolvingCredentials.java b/extensions/s3/src/main/java/io/deephaven/extensions/s3/ResolvingCredentials.java index b65c4ca9f14..199eb4a5eff 100644 --- a/extensions/s3/src/main/java/io/deephaven/extensions/s3/ResolvingCredentials.java +++ b/extensions/s3/src/main/java/io/deephaven/extensions/s3/ResolvingCredentials.java @@ -26,13 +26,6 @@ enum ResolvingCredentials implements AwsSdkV2Credentials { INSTANCE; - private static final AwsCredentialsProviderChain PROVIDER_CHAIN = AwsCredentialsProviderChain.builder() - .credentialsProviders( - DefaultCredentialsProvider.create(), - AnonymousCredentialsProvider.create()) - .reuseLastProviderEnabled(false) // Don't cache because this chain is a shared static instance - .build(); - @Override public final AwsCredentialsProvider awsV2CredentialsProvider(@NotNull final S3Instructions instructions) { if (instructions.profileName().isPresent() @@ -40,6 +33,11 @@ public final AwsCredentialsProvider awsV2CredentialsProvider(@NotNull final S3In || instructions.credentialsFilePath().isPresent()) { return ProfileCredentials.INSTANCE.awsV2CredentialsProvider(instructions); } - return PROVIDER_CHAIN; + return AwsCredentialsProviderChain.builder() + .credentialsProviders( + DefaultCredentialsProvider.create(), + AnonymousCredentialsProvider.create()) + .reuseLastProviderEnabled(true) + .build(); } } diff --git a/gradle.properties b/gradle.properties index e03001083f8..3171dbc9ea4 100644 --- a/gradle.properties +++ b/gradle.properties @@ -9,7 +9,7 @@ # Re-builders who want to inherit the base version, but have their own qualifier can set -PdeephavenBaseQualifier="customQualifier": "X.Y.Z-customQualifier". # # Re-builders who want a fully custom version can set -PdeephavenBaseVersion="customVersion" -PdeephavenBaseQualifier="": "customVersion". -deephavenBaseVersion=0.37.0 +deephavenBaseVersion=0.38.0 deephavenBaseQualifier=SNAPSHOT #org.gradle.debug diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index c688410b7ee..0677e22c289 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -3,13 +3,13 @@ airlift = "2.0.2" arrow = "18.0.0" autoservice = "1.1.1" avro = "1.12.0" -awssdk = "2.24.5" +awssdk = "2.29.20" # See dependency matrix for particular gRPC versions at https://github.com/grpc/grpc-java/blob/master/SECURITY.md#netty boringssl = "2.0.61.Final" calcite = "1.38.0" classgraph = "4.8.177" commons-compress = "1.27.1" -commons-io = "2.17.0" +commons-io = "2.18.0" commons-lang3 = "3.17.0" commons-math3 = "3.6.1" commons-text = "1.12.0" @@ -23,7 +23,7 @@ deephaven-hash = "0.1.0" deephaven-suan-shu = "0.1.1" dev-dirs = "26" dsi = "8.5.15" -elemental = "1.2.1" +elemental = "1.2.3" f4b6a3 = "6.0.0" flatbuffers = "24.3.25" freemarker = "2.3.33" @@ -33,7 +33,7 @@ groovy = "3.0.22" # Only bump this in concert with boringssl grpc = "1.65.1" guava = "33.3.1-jre" -gwt = "2.11.0" +gwt = "2.12.1" # used by GwtTools gwtJetty = "9.4.44.v20210927" hadoop = "3.4.1" @@ -54,7 +54,7 @@ jetty = "11.0.20" jpy = "0.19.0" jsinterop = "2.0.2" # google is annoying, and have different versions released for the same groupId -jsinterop-base = "1.0.1" +jsinterop-base = "1.0.3" logback = "1.5.12" lz4 = "1.8.0" mindrot = "0.4" @@ -87,7 +87,7 @@ h2database = "2.3.232" jmock = "2.13.1" junit = "5.11.3" junit4 = "4.13.2" -testcontainers = "1.20.3" +testcontainers = "1.20.4" jmh = "1.37" spockframework = "2.3-groovy-3.0" @@ -187,7 +187,7 @@ gwt-user = { module = "org.gwtproject:gwt-user", version.ref = "gwt" } hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" } # These are transitive, inlined versions; see io.deephaven.hadoop-common-dependencies woodstox-core = { module = "com.fasterxml.woodstox:woodstox-core", version = "6.6.2" } -hadoop-shaded-guava = { module = "org.apache.hadoop.thirdparty:hadoop-shaded-guava", version = "1.2.0" } +hadoop-shaded-guava = { module = "org.apache.hadoop.thirdparty:hadoop-shaded-guava", version = "1.3.0" } commons-collections = { module = "commons-collections:commons-collections", version = "3.2.2" } hdrhistogram = { module = "org.hdrhistogram:HdrHistogram", version.ref = "hdrhistogram" } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 6acc1431eae..22286c90de3 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip +distributionSha256Sum=89d4e70e4e84e2d2dfbb63e4daa53e21b25017cc70c37e4eea31ee51fb15098a +distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebOutputStream.java b/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebOutputStream.java index 8ea70c19022..a2f70b7ac47 100644 --- a/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebOutputStream.java +++ b/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebOutputStream.java @@ -17,6 +17,7 @@ import jakarta.servlet.WriteListener; import java.io.IOException; +import java.util.concurrent.atomic.AtomicReference; /** * Wraps the usual ServletOutputStream so as to allow downstream writers to use it according to the servlet spec, but @@ -24,13 +25,15 @@ */ public class GrpcWebOutputStream extends ServletOutputStream implements WriteListener { private final ServletOutputStream wrapped; + private final GrpcWebServletResponse grpcWebServletResponse; // Access to these are guarded by synchronized private Runnable waiting; private WriteListener writeListener; - public GrpcWebOutputStream(ServletOutputStream wrapped) { + public GrpcWebOutputStream(ServletOutputStream wrapped, GrpcWebServletResponse grpcWebServletResponse) { this.wrapped = wrapped; + this.grpcWebServletResponse = grpcWebServletResponse; } @Override @@ -97,7 +100,21 @@ public void flush() throws IOException { @Override public void close() throws IOException { - wrapped.close(); + // Since we're a grpc-web response, we must write trailers on our way out as part of close - but trailers + // for grpc-web are a data frame, not HTTP trailers. Call up to the response to write the trailer frame, + // then close the underlying stream. + AtomicReference exception = new AtomicReference<>(); + grpcWebServletResponse.writeTrailers(() -> { + try { + wrapped.close(); + } catch (IOException e) { + exception.set(e); + } + }); + IOException ex = exception.get(); + if (ex != null) { + throw ex; + } } @Override diff --git a/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletRequest.java b/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletRequest.java index 82dfde664ab..66a25d37a83 100644 --- a/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletRequest.java +++ b/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletRequest.java @@ -58,6 +58,8 @@ public AsyncContext startAsync() throws IllegalStateException { public AsyncContext startAsync(ServletRequest servletRequest, ServletResponse servletResponse) throws IllegalStateException { AsyncContext delegate = super.startAsync(servletRequest, servletResponse); + // Note that this anonymous class has no purpose while our workaround for + // https://github.com/deephaven/deephaven-core/issues/6400 is in place. return new DelegatingAsyncContext(delegate) { private void safelyComplete() { try { diff --git a/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletResponse.java b/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletResponse.java index 2306e3e20f5..68affbeb319 100644 --- a/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletResponse.java +++ b/grpc-java/grpc-servlet-jakarta/src/main/java/io/grpc/servlet/jakarta/web/GrpcWebServletResponse.java @@ -60,7 +60,7 @@ public Supplier> getTrailerFields() { public synchronized GrpcWebOutputStream getOutputStream() throws IOException { if (outputStream == null) { // Provide our own output stream instance, so we can control/monitor the write listener - outputStream = new GrpcWebOutputStream(super.getOutputStream()); + outputStream = new GrpcWebOutputStream(super.getOutputStream(), this); } return outputStream; } diff --git a/py/client-ticking/README.md b/py/client-ticking/README.md index 6d3e50ba8ed..3739e23f565 100644 --- a/py/client-ticking/README.md +++ b/py/client-ticking/README.md @@ -100,7 +100,7 @@ cd %DHSRC%\py\client-ticking rem Ensure we clean the remnants of any pre-existing build. rmdir build dist /s /q rem replace the value below to the version you are building -set DEEPHAVEN_VERSION=0.37.0-SNAPSHOT +set DEEPHAVEN_VERSION=0.38.0-SNAPSHOT python setup.py build_ext -i ``` diff --git a/py/server/deephaven/experimental/iceberg.py b/py/server/deephaven/experimental/iceberg.py index a38093befaf..16e9d359256 100644 --- a/py/server/deephaven/experimental/iceberg.py +++ b/py/server/deephaven/experimental/iceberg.py @@ -3,7 +3,7 @@ # """ This module adds Iceberg table support into Deephaven. """ from __future__ import annotations -from typing import Optional, Dict +from typing import Optional, Dict, Union, Sequence import jpy @@ -14,10 +14,14 @@ from deephaven.jcompat import j_hashmap -_JIcebergReadInstructions = jpy.get_type("io.deephaven.iceberg.util.IcebergReadInstructions") _JIcebergUpdateMode = jpy.get_type("io.deephaven.iceberg.util.IcebergUpdateMode") +_JIcebergReadInstructions = jpy.get_type("io.deephaven.iceberg.util.IcebergReadInstructions") +_JIcebergWriteInstructions = jpy.get_type("io.deephaven.iceberg.util.IcebergWriteInstructions") +_JSchemaProvider = jpy.get_type("io.deephaven.iceberg.util.SchemaProvider") +_JTableParquetWriterOptions = jpy.get_type("io.deephaven.iceberg.util.TableParquetWriterOptions") _JIcebergCatalogAdapter = jpy.get_type("io.deephaven.iceberg.util.IcebergCatalogAdapter") _JIcebergTableAdapter = jpy.get_type("io.deephaven.iceberg.util.IcebergTableAdapter") +_JIcebergTableWriter = jpy.get_type("io.deephaven.iceberg.util.IcebergTableWriter") _JIcebergTable = jpy.get_type("io.deephaven.iceberg.util.IcebergTable") _JIcebergTools = jpy.get_type("io.deephaven.iceberg.util.IcebergTools") @@ -34,7 +38,8 @@ class IcebergUpdateMode(JObjectWrapper): """ - This class specifies the update mode for an Iceberg table to be loaded into Deephaven. The modes are: + :class:`.IcebergUpdateMode` specifies the update mode for an Iceberg table to be loaded into Deephaven. The modes + are: - :py:func:`static() `: The table is loaded once and does not change - :py:func:`manual_refresh() `: The table can be manually refreshed by the user. @@ -62,7 +67,7 @@ def manual_refresh(cls) -> IcebergUpdateMode: return IcebergUpdateMode(_JIcebergUpdateMode.manualRefreshingMode()) @classmethod - def auto_refresh(cls, auto_refresh_ms:Optional[int] = None) -> IcebergUpdateMode: + def auto_refresh(cls, auto_refresh_ms: Optional[int] = None) -> IcebergUpdateMode: """ Creates an IcebergUpdateMode with auto-refreshing enabled. @@ -81,8 +86,9 @@ def j_object(self) -> jpy.JType: class IcebergReadInstructions(JObjectWrapper): """ - This class specifies the instructions for reading an Iceberg table into Deephaven. These include column rename - instructions and table definitions, as well as special data instructions for loading data files from the cloud. + :class:`.IcebergReadInstructions` specifies the instructions for reading an Iceberg table into Deephaven. These + include column rename instructions and table definitions, as well as special data instructions for loading data + files from the cloud. """ j_object_type = _JIcebergReadInstructions @@ -140,17 +146,226 @@ def j_object(self) -> jpy.JType: return self._j_object +class IcebergWriteInstructions(JObjectWrapper): + """ + :class:`.IcebergWriteInstructions` provides instructions intended for writing deephaven tables as partitions to Iceberg + tables. + """ + + j_object_type = _JIcebergWriteInstructions + + def __init__(self, + tables: Union[Table, Sequence[Table]], + partition_paths: Optional[Union[str, Sequence[str]]] = None): + """ + Initializes the instructions using the provided parameters. + + Args: + tables (Union[Table, Sequence[Table]]): The deephaven tables to write. + partition_paths (Optional[Union[str, Sequence[str]]]): The partition paths where each table will be written. + For example, if the iceberg table is partitioned by "year" and "month", a partition path could be + "year=2021/month=01". + If writing to a partitioned iceberg table, users must provide partition path for each table in tables + argument in the same order. + Else when writing to a non-partitioned table, users should not provide any partition paths. + Defaults to `None`, which means the deephaven tables will be written to the root data directory of the + iceberg table. + + Raises: + DHError: If unable to build the instructions object. + """ + + try: + builder = self.j_object_type.builder() + + if isinstance(tables, Table): + builder.addTables(tables.j_table) + elif isinstance(tables, Sequence): + for table in tables: + builder.addTables(table.j_table) + + if partition_paths: + if isinstance(partition_paths, str): + builder.addPartitionPaths(partition_paths) + elif isinstance(partition_paths, Sequence): + for partition_path in partition_paths: + builder.addPartitionPaths(partition_path) + + self._j_object = builder.build() + + except Exception as e: + raise DHError(e, "Failed to build Iceberg write instructions") from e + + @property + def j_object(self) -> jpy.JType: + return self._j_object + + +class SchemaProvider(JObjectWrapper): + """ + :class:`.SchemaProvider` is used to extract the schema from an Iceberg table. Users can specify multiple ways to do + so, for example, by schema ID, snapshot ID, current schema, etc. This can be useful for passing a schema when + writing to an Iceberg table. + """ + + j_object_type = _JSchemaProvider + + def __init__(self, _j_object: jpy.JType): + """ + Initializes the :class:`.SchemaProvider` object. + + Args: + _j_object (SchemaProvider): the Java :class:`.SchemaProvider` object. + """ + self._j_object = _j_object + + @property + def j_object(self) -> jpy.JType: + return self._j_object + + @classmethod + def from_current(cls) -> 'SchemaProvider': + """ + Used for extracting the current schema from the table. + + Returns: + the SchemaProvider object. + """ + return cls(_JSchemaProvider.fromCurrent()) + + @classmethod + def from_schema_id(cls, schema_id: int) -> 'SchemaProvider': + """ + Used for extracting the schema from the table using the specified schema id. + + Args: + schema_id (int): the schema id to use. + + Returns: + the :class:`.SchemaProvider` object. + """ + return cls(_JSchemaProvider.fromSchemaId(schema_id)) + + @classmethod + def from_snapshot_id(cls, snapshot_id: int) -> 'SchemaProvider': + """ + Used for extracting the schema from the table using the specified snapshot id. + + Args: + snapshot_id (int): the snapshot id to use. + + Returns: + the :class:`.SchemaProvider` object. + """ + return cls(_JSchemaProvider.fromSnapshotId(snapshot_id)) + + @classmethod + def from_current_snapshot(cls) -> 'SchemaProvider': + """ + Used for extracting the schema from the table using the current snapshot. + + Returns: + the SchemaProvider object. + """ + return cls(_JSchemaProvider.fromCurrentSnapshot()) + + +class TableParquetWriterOptions(JObjectWrapper): + """ + :class:`.TableParquetWriterOptions` provides specialized instructions for configuring :class:`.IcebergTableWriter` + instances. + """ + + j_object_type = _JTableParquetWriterOptions + + def __init__(self, + table_definition: TableDefinitionLike, + schema_provider: Optional[SchemaProvider] = None, + field_id_to_column_name: Optional[Dict[int, str]] = None, + compression_codec_name: Optional[str] = None, + maximum_dictionary_keys: Optional[int] = None, + maximum_dictionary_size: Optional[int] = None, + target_page_size: Optional[int] = None, + data_instructions: Optional[s3.S3Instructions] = None): + """ + Initializes the instructions using the provided parameters. + + Args: + table_definition: TableDefinitionLike: The table definition to use when writing Iceberg data files using + this writer instance. This definition can be used to skip some columns or add additional columns with + null values. The provided definition should have at least one column. + schema_provider: Optional[SchemaProvider]: Used to extract a Schema from a iceberg table. This schema will + be used in conjunction with the field_id_to_column_name to map Deephaven columns from table_definition + to Iceberg columns. + Users can specify how to extract the schema in multiple ways (by ID, snapshot ID, initial schema, etc.). + Defaults to `None`, which means use the current schema from the table. + field_id_to_column_name: Optional[Dict[int, str]]: A one-to-one map from Iceberg field IDs from the + schema_spec to Deephaven column names from the table_definition. + Defaults to `None`, which means map Iceberg columns to Deephaven columns using column names. + compression_codec_name (Optional[str]): The compression codec to use for writing the parquet file. Allowed + values include "UNCOMPRESSED", "SNAPPY", "GZIP", "LZO", "LZ4", "LZ4_RAW", "ZSTD", etc. Defaults to + `None`, which means use "SNAPPY". + maximum_dictionary_keys (Optional[int]): the maximum number of unique keys the Parquet writer should add to + a dictionary page before switching to non-dictionary encoding, never used for non-String columns. + Defaults to `None`, which means use 2^20 (1,048,576) + maximum_dictionary_size (Optional[int]): the maximum number of bytes the Parquet writer should add to the + dictionary before switching to non-dictionary encoding, never used for non-String columns. Defaults to + `None`, which means use 2^20 (1,048,576) + target_page_size (Optional[int]): the target Parquet file page size in bytes, if not specified. Defaults to + `None`, which means use 2^20 bytes (1 MiB) + + Raises: + DHError: If unable to build the object. + """ + + try: + builder = self.j_object_type.builder() + + builder.tableDefinition(TableDefinition(table_definition).j_table_definition) + + if schema_provider: + builder.schemaProvider(schema_provider.j_object) + + if field_id_to_column_name: + for field_id, column_name in field_id_to_column_name.items(): + builder.putFieldIdToColumnName(field_id, column_name) + + if compression_codec_name: + builder.compressionCodecName(compression_codec_name) + + if maximum_dictionary_keys: + builder.maximumDictionaryKeys(maximum_dictionary_keys) + + if maximum_dictionary_size: + builder.maximumDictionarySize(maximum_dictionary_size) + + if target_page_size: + builder.targetPageSize(target_page_size) + + if data_instructions: + builder.dataInstructions(data_instructions.j_object) + + self._j_object = builder.build() + + except Exception as e: + raise DHError(e, "Failed to build Iceberg write instructions") from e + + @property + def j_object(self) -> jpy.JType: + return self._j_object + + class IcebergTable(Table): """ - IcebergTable is a subclass of Table that allows users to dynamically update the table with new snapshots from - the Iceberg catalog. + :class:`.IcebergTable` is a subclass of Table that allows users to dynamically update the table with new snapshots + from the Iceberg catalog. """ j_object_type = _JIcebergTable def __init__(self, j_table: jpy.JType): super().__init__(j_table) - def update(self, snapshot_id:Optional[int] = None): + def update(self, snapshot_id: Optional[int] = None): """ Updates the table to match the contents of the specified snapshot. This may result in row removes and additions that will be propagated asynchronously via this IcebergTable's UpdateGraph. If no snapshot is provided, the @@ -167,7 +382,7 @@ def update(self, snapshot_id:Optional[int] = None): """ try: - if snapshot_id is not None: + if snapshot_id: self.j_object.update(snapshot_id) return self.j_object.update() @@ -179,10 +394,40 @@ def j_object(self) -> jpy.JType: return self.j_table +class IcebergTableWriter(JObjectWrapper): + """ + :class:`.IcebergTableWriter` is responsible for writing Deephaven tables to an Iceberg table. Each + :class:`.IcebergTableWriter` instance associated with a single :class:`.IcebergTableAdapter` and can be used to + write multiple Deephaven tables to this Iceberg table. + """ + j_object_type = _JIcebergTableWriter or type(None) + + def __init__(self, j_object: _JIcebergTableWriter): + self.j_table_writer = j_object + + def append(self, instructions: IcebergWriteInstructions): + """ + Append the provided Deephaven tables as new partitions to the existing Iceberg table in a single snapshot. + Users can provide the tables using the :attr:`.IcebergWriteInstructions.tables` parameter and optionally provide the + partition paths where each table will be written using the :attr:`.IcebergWriteInstructions.partition_paths` + parameter. + This method will not perform any compatibility checks between the existing schema and the provided Deephaven + tables. All such checks happen at the time of creation of the :class:`.IcebergTableWriter` instance. + + Args: + instructions (IcebergWriteInstructions): the customization instructions for write. + """ + self.j_object.append(instructions.j_object) + + @property + def j_object(self) -> jpy.JType: + return self.j_table_writer + + class IcebergTableAdapter(JObjectWrapper): """ - This class provides an interface for interacting with Iceberg tables. It allows the user to list snapshots, - retrieve table definitions and reading Iceberg tables into Deephaven tables. + :class:`.IcebergTableAdapter` provides an interface for interacting with Iceberg tables. It allows the user to list + snapshots, retrieve table definitions and reading Iceberg tables into Deephaven tables. """ j_object_type = _JIcebergTableAdapter or type(None) @@ -217,7 +462,7 @@ def definition(self, instructions: Optional[IcebergReadInstructions] = None) -> a table containing the table definition. """ - if instructions is not None: + if instructions: return Table(self.j_object.definitionTable(instructions.j_object)) return Table(self.j_object.definitionTable()) @@ -233,13 +478,27 @@ def table(self, instructions: Optional[IcebergReadInstructions] = None) -> Icebe instructions. Returns: - Table: the table read from the catalog. + the table read from the catalog. """ - if instructions is not None: + if instructions: return IcebergTable(self.j_object.table(instructions.j_object)) return IcebergTable(self.j_object.table()) + def table_writer(self, writer_options: TableParquetWriterOptions) -> IcebergTableWriter: + """ + Create a new :class:`.IcebergTableWriter` for this Iceberg table using the provided writer options. + This method will perform schema validation to ensure that the provided table definition from the writer options + is compatible with the Iceberg table schema. All further writes performed by the returned writer will not be + validated against the table's schema, and thus will be faster. + + Args: + writer_options: The options to configure the table writer. + + Returns: + the table writer object + """ + return IcebergTableWriter(self.j_object.tableWriter(writer_options.j_object)) @property def j_object(self) -> jpy.JType: @@ -248,8 +507,8 @@ def j_object(self) -> jpy.JType: class IcebergCatalogAdapter(JObjectWrapper): """ - This class provides an interface for interacting with Iceberg catalogs. It allows listing namespaces, tables and - snapshots, as well as reading Iceberg tables into Deephaven tables. + :class:`.IcebergCatalogAdapter` provides an interface for interacting with Iceberg catalogs. It allows listing + namespaces, tables and snapshots, as well as reading Iceberg tables into Deephaven tables. """ j_object_type = _JIcebergCatalogAdapter or type(None) @@ -269,7 +528,7 @@ def namespaces(self, namespace: Optional[str] = None) -> Table: a table containing the namespaces. """ - if namespace is not None: + if namespace: return Table(self.j_object.namespaces(namespace)) return Table(self.j_object.namespaces()) @@ -299,6 +558,22 @@ def load_table(self, table_identifier: str) -> IcebergTableAdapter: return IcebergTableAdapter(self.j_object.loadTable(table_identifier)) + def create_table(self, table_identifier: str, table_definition: TableDefinitionLike) -> IcebergTableAdapter: + """ + Create a new Iceberg table in the catalog with the given table identifier and definition. + All columns of partitioning type will be used to create the partition spec for the table. + + Args: + table_identifier (str): the identifier of the new table. + table_definition (TableDefinitionLike): the table definition of the new table. + + Returns: + :class:`.IcebergTableAdapter`: the table adapter for the new Iceberg table. + """ + + return IcebergTableAdapter(self.j_object.createTable(table_identifier, + TableDefinition(table_definition).j_table_definition)) + @property def j_object(self) -> jpy.JType: return self.j_catalog_adapter @@ -333,7 +608,7 @@ def adapter_s3_rest( need to set this; it is most useful when connecting to non-AWS, S3-compatible APIs. Returns: - IcebergCatalogAdapter: the catalog adapter for the provided S3 REST catalog. + :class:`.IcebergCatalogAdapter`: the catalog adapter for the provided S3 REST catalog. Raises: DHError: If unable to build the catalog adapter. @@ -371,7 +646,7 @@ def adapter_aws_glue( catalog URI. Returns: - IcebergCatalogAdapter: the catalog adapter for the provided AWS Glue catalog. + :class:`.IcebergCatalogAdapter`: the catalog adapter for the provided AWS Glue catalog. Raises: DHError: If unable to build the catalog adapter. @@ -467,7 +742,7 @@ def adapter( hadoop_config (Optional[Dict[str, str]]): hadoop configuration properties for the catalog to load s3_instructions (Optional[s3.S3Instructions]): the S3 instructions if applicable Returns: - IcebergCatalogAdapter: the catalog adapter created from the provided properties + :class:`.IcebergCatalogAdapter`: the catalog adapter created from the provided properties Raises: DHError: If unable to build the catalog adapter @@ -481,8 +756,8 @@ def adapter( return IcebergCatalogAdapter( _JIcebergToolsS3.createAdapter( name, - j_hashmap(properties if properties is not None else {}), - j_hashmap(hadoop_config if hadoop_config is not None else {}), + j_hashmap(properties if properties else {}), + j_hashmap(hadoop_config if hadoop_config else {}), s3_instructions.j_object, ) ) @@ -493,8 +768,8 @@ def adapter( return IcebergCatalogAdapter( _JIcebergTools.createAdapter( name, - j_hashmap(properties if properties is not None else {}), - j_hashmap(hadoop_config if hadoop_config is not None else {}), + j_hashmap(properties if properties else {}), + j_hashmap(hadoop_config if hadoop_config else {}), ) ) except Exception as e: diff --git a/py/server/deephaven/experimental/s3.py b/py/server/deephaven/experimental/s3.py index 47426533335..afe2b463e88 100644 --- a/py/server/deephaven/experimental/s3.py +++ b/py/server/deephaven/experimental/s3.py @@ -25,6 +25,7 @@ module will fail to find the java types. """ + class Credentials(JObjectWrapper): """ Credentials object for authenticating with an S3 server. diff --git a/py/server/tests/test_iceberg.py b/py/server/tests/test_iceberg.py index a3dcb72ac2f..28a2758c0d0 100644 --- a/py/server/tests/test_iceberg.py +++ b/py/server/tests/test_iceberg.py @@ -13,6 +13,7 @@ _JTableDefinition = jpy.get_type("io.deephaven.engine.table.TableDefinition") + class IcebergTestCase(BaseTestCase): """ Test cases for the deephaven.iceberg module (performed locally) """ @@ -46,7 +47,7 @@ def test_instruction_create_with_col_renames(self): self.assertTrue(col_rename_dict["old_name_c"] == "new_name_c") def test_instruction_create_with_table_definition_dict(self): - table_def={ + table_def = { "x": dtypes.int32, "y": dtypes.double, "z": dtypes.double, @@ -59,7 +60,7 @@ def test_instruction_create_with_table_definition_dict(self): self.assertTrue(col_names[2] == "z") def test_instruction_create_with_table_definition_list(self): - table_def=[ + table_def = [ col_def("Partition", dtypes.int32, column_type=ColumnType.PARTITIONING), col_def("x", dtypes.int32), col_def("y", dtypes.double), @@ -76,3 +77,65 @@ def test_instruction_create_with_table_definition_list(self): def test_instruction_create_with_snapshot_id(self): iceberg_read_instructions = iceberg.IcebergReadInstructions(snapshot_id=12345) self.assertTrue(iceberg_read_instructions.j_object.snapshotId().getAsLong() == 12345) + + def test_writer_options_create_default(self): + writer_options = iceberg.TableParquetWriterOptions(table_definition={"x": dtypes.int32}) + self.assertEqual(writer_options.j_object.compressionCodecName(), "SNAPPY") + self.assertEqual(writer_options.j_object.maximumDictionaryKeys(), 1048576) + self.assertEqual(writer_options.j_object.maximumDictionarySize(), 1048576) + self.assertEqual(writer_options.j_object.targetPageSize(), 65536) + + def test_writer_options_create_with_s3_instructions(self): + s3_instructions = s3.S3Instructions(region_name="us-east-1", + access_key_id="some_access_key_id", + secret_access_key="some_secret_access_key" + ) + writer_options = iceberg.TableParquetWriterOptions(table_definition={"x": dtypes.int32}, + data_instructions=s3_instructions) + + def test_writer_options_create_with_table_definition_dict(self): + table_def = { + "x": dtypes.int32, + "y": dtypes.double, + "z": dtypes.double, + } + writer_options = iceberg.TableParquetWriterOptions(table_def) + col_names = j_list_to_list(writer_options.j_object.tableDefinition().getColumnNames()) + self.assertTrue(col_names[0] == "x") + self.assertTrue(col_names[1] == "y") + self.assertTrue(col_names[2] == "z") + + def test_writer_options_create_with_table_definition_list(self): + table_def = [ + col_def("Partition", dtypes.int32, column_type=ColumnType.PARTITIONING), + col_def("x", dtypes.int32), + col_def("y", dtypes.double), + col_def("z", dtypes.double), + ] + + writer_options = iceberg.TableParquetWriterOptions(table_def) + col_names = j_list_to_list(writer_options.j_object.tableDefinition().getColumnNames()) + self.assertTrue(col_names[0] == "Partition") + self.assertTrue(col_names[1] == "x") + self.assertTrue(col_names[2] == "y") + self.assertTrue(col_names[3] == "z") + + def test_writer_options_create_with_compression_codec(self): + writer_options = iceberg.TableParquetWriterOptions(table_definition={"x": dtypes.int32}, + compression_codec_name="GZIP") + self.assertEqual(writer_options.j_object.compressionCodecName(), "GZIP") + + def test_writer_options_create_with_max_dictionary_keys(self): + writer_options = iceberg.TableParquetWriterOptions(table_definition={"x": dtypes.int32}, + maximum_dictionary_keys=1024) + self.assertEqual(writer_options.j_object.maximumDictionaryKeys(), 1024) + + def test_writer_options_create_with_max_dictionary_size(self): + writer_options = iceberg.TableParquetWriterOptions(table_definition={"x": dtypes.int32}, + maximum_dictionary_size=8192) + self.assertEqual(writer_options.j_object.maximumDictionarySize(), 8192) + + def test_writer_options_create_with_target_page_size(self): + writer_options = iceberg.TableParquetWriterOptions(table_definition={"x": dtypes.int32}, + target_page_size=4096) + self.assertEqual(writer_options.j_object.targetPageSize(), 4096) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/subscription/AbstractTableSubscription.java b/web/client-api/src/main/java/io/deephaven/web/client/api/subscription/AbstractTableSubscription.java index cf75bc73665..9c2746ccb01 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/subscription/AbstractTableSubscription.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/subscription/AbstractTableSubscription.java @@ -29,6 +29,7 @@ import io.deephaven.web.shared.data.RangeSet; import io.deephaven.web.shared.data.ShiftedRange; import io.deephaven.web.shared.fu.JsRunnable; +import jsinterop.annotations.JsMethod; import jsinterop.annotations.JsProperty; import jsinterop.base.Any; import jsinterop.base.Js; @@ -51,6 +52,7 @@ * exposed to api consumers, rather than wrapping in a Table type, as it handles the barrage stream and provides events * that client code can listen to. */ +@TsIgnore public abstract class AbstractTableSubscription extends HasEventHandling { /** * Indicates that some new data is available on the client, either an initial snapshot or a delta update. The @@ -534,6 +536,7 @@ public JsArray getColumns() { /** * Stops the subscription on the server. */ + @JsMethod public void close() { state.unretain(this); if (doExchange != null) { diff --git a/web/client-api/src/main/resources/io/deephaven/web/super/java/math/BigInteger.java b/web/client-api/src/main/resources/io/deephaven/web/super/java/math/BigInteger.java deleted file mode 100644 index 500451f2f96..00000000000 --- a/web/client-api/src/main/resources/io/deephaven/web/super/java/math/BigInteger.java +++ /dev/null @@ -1,1599 +0,0 @@ -/* - * Copyright 2009 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not - * use this file except in compliance with the License. You may obtain a copy of - * the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - * - * INCLUDES MODIFICATIONS BY RICHARD ZSCHECH AS WELL AS GOOGLE. - */ -package java.math; - -import static javaemul.internal.Coercions.ensureInt; -import static javaemul.internal.InternalPreconditions.checkCriticalArgument; -import static javaemul.internal.InternalPreconditions.checkNotNull; - -import java.io.Serializable; -import java.util.Random; -import javaemul.internal.LongUtils; - -/** - * This class represents immutable integer numbers of arbitrary length. Large - * numbers are typically used in security applications and therefore BigIntegers - * offer dedicated functionality like the generation of large prime numbers or - * the computation of modular inverse. - *

- * Since the class was modeled to offer all the functionality as the - * {@link Integer} class does, it provides even methods that operate bitwise on - * a two's complement representation of large integers. Note however that the - * implementations favors an internal representation where magnitude and sign - * are treated separately. Hence such operations are inefficient and should be - * discouraged. In simple words: Do NOT implement any bit fields based on - * BigInteger. - */ -public class BigInteger extends Number implements Comparable, - Serializable { - - /** - * The {@code BigInteger} constant 1. - */ - public static final BigInteger ONE = new BigInteger(1, 1); - - /* Fields used for the internal representation. */ - - /** - * The {@code BigInteger} constant 10. - */ - public static final BigInteger TEN = new BigInteger(1, 10); - - /** - * The {@code BigInteger} constant 0. - */ - public static final BigInteger ZERO = new BigInteger(0, 0); - - /** - * The {@code BigInteger} constant 0 used for comparison. - */ - static final int EQUALS = 0; - - /** - * The {@code BigInteger} constant 1 used for comparison. - */ - static final int GREATER = 1; - - /** - * The {@code BigInteger} constant -1 used for comparison. - */ - static final int LESS = -1; - - /** - * The {@code BigInteger} constant -1. - */ - static final BigInteger MINUS_ONE = new BigInteger(-1, 1); - - /** - * All the {@code BigInteger} numbers in the range [0,10] are cached. - */ - static final BigInteger[] SMALL_VALUES = { - ZERO, ONE, new BigInteger(1, 2), new BigInteger(1, 3), - new BigInteger(1, 4), new BigInteger(1, 5), new BigInteger(1, 6), - new BigInteger(1, 7), new BigInteger(1, 8), new BigInteger(1, 9), TEN}; - - static final BigInteger[] TWO_POWS; - - /** - * This is the serialVersionUID used by the sun implementation. - */ - private static final long serialVersionUID = -8287574255936472291L; - - static { - TWO_POWS = new BigInteger[32]; - for (int i = 0; i < TWO_POWS.length; i++) { - TWO_POWS[i] = BigInteger.valueOf(1L << i); - } - } - - /** - * Returns a random positive {@code BigInteger} instance in the range [0, - * 2^(bitLength)-1] which is probably prime. The probability that the returned - * {@code BigInteger} is prime is beyond (1-1/2^80). - *

- * Implementation Note: Currently {@code rnd} is ignored. - * - * @param bitLength length of the new {@code BigInteger} in bits. - * @param rnd random generator used to generate the new {@code BigInteger}. - * @return probably prime random {@code BigInteger} instance. - * @throws ArithmeticException if {@code bitLength < 2}. - */ - public static BigInteger probablePrime(int bitLength, Random rnd) { - return new BigInteger(bitLength, 100, rnd); - } - - public static BigInteger valueOf(long val) { - return val >= 0 ? BigInteger.fromBits(val) : BigInteger.fromBits(-val).negate(); - } - - private static BigInteger fromBits(long bits) { - int lowBits = (int) bits; - int highBits = LongUtils.getHighBits(bits); - if (highBits != 0) { - return new BigInteger(1, lowBits, highBits); - } - if (lowBits > 10 || lowBits < 0) { - return new BigInteger(1, lowBits); - } - return SMALL_VALUES[lowBits]; - } - - static BigInteger getPowerOfTwo(int exp) { - if (exp < TWO_POWS.length) { - return TWO_POWS[exp]; - } - int intCount = exp >> 5; - int bitN = exp & 31; - int resDigits[] = new int[intCount + 1]; - resDigits[intCount] = 1 << bitN; - return new BigInteger(1, intCount + 1, resDigits); - } - - /** - * @see BigInteger#BigInteger(String, int) - */ - private static void setFromString(BigInteger bi, String val, int radix) { - int sign; - int[] digits; - int numberLength; - int stringLength = val.length(); - int startChar; - int endChar = stringLength; - - if (val.charAt(0) == '-') { - sign = -1; - startChar = 1; - stringLength--; - } else { - sign = 1; - startChar = 0; - } - /* - * We use the following algorithm: split a string into portions of n - * characters and convert each portion to an integer according to the radix. - * Then convert an exp(radix, n) based number to binary using the - * multiplication method. See D. Knuth, The Art of Computer Programming, - * vol. 2. - */ - - int charsPerInt = Conversion.digitFitInInt[radix]; - int bigRadixDigitsLength = stringLength / charsPerInt; - int topChars = stringLength % charsPerInt; - - if (topChars != 0) { - bigRadixDigitsLength++; - } - digits = new int[bigRadixDigitsLength]; - // Get the maximal power of radix that fits in int - int bigRadix = Conversion.bigRadices[radix - 2]; - // Parse an input string and accumulate the BigInteger's magnitude - int digitIndex = 0; // index of digits array - int substrEnd = startChar + ((topChars == 0) ? charsPerInt : topChars); - int newDigit; - - for (int substrStart = startChar; substrStart < endChar; substrStart = substrEnd, substrEnd = substrStart - + charsPerInt) { - int bigRadixDigit = Integer.parseInt( - val.substring(substrStart, substrEnd), radix); - newDigit = Multiplication.multiplyByInt(digits, digitIndex, bigRadix); - newDigit += Elementary.inplaceAdd(digits, digitIndex, bigRadixDigit); - digits[digitIndex++] = newDigit; - } - numberLength = digitIndex; - bi.sign = sign; - bi.numberLength = numberLength; - bi.digits = digits; - bi.cutOffLeadingZeroes(); - } - - /** - * The magnitude of this big integer. This array is in little endian order and - * each "digit" is a 32-bit unsigned integer. For example: {@code 13} is - * represented as [ 13 ] {@code -13} is represented as [ 13 ] {@code 2^32 + - * 13} is represented as [ 13, 1 ] {@code 2^64 + 13} is represented as [ 13, - * 0, 1 ] {@code 2^31} is represented as [ Integer.MIN_VALUE ] The magnitude - * array may be longer than strictly necessary, which results in additional - * trailing zeros. - * - *

TODO(jat): consider changing to 24-bit integers for better performance - * in browsers. - */ - transient int digits[]; - - /** - * The length of this in measured in ints. Can be less than digits.length(). - */ - transient int numberLength; - - /** - * The sign of this. - */ - transient int sign; - - private transient int firstNonzeroDigit = -2; - - /** - * Cache for the hash code. - */ - private transient int hashCode = 0; - - /** - * Constructs a new {@code BigInteger} from the given two's complement - * representation. The most significant byte is the entry at index 0. The most - * significant bit of this entry determines the sign of the new {@code - * BigInteger} instance. The given array must not be empty. - * - * @param val two's complement representation of the new {@code BigInteger}. - * @throws NullPointerException if {@code val == null}. - * @throws NumberFormatException if the length of {@code val} is zero. - */ - public BigInteger(byte[] val) { - this(val, 0, val.length); - } - - /** - * Constructs a new {@code BigInteger} from the given two's complement - * representation. The most significant byte is the entry at index 0. The most - * significant bit of this entry determines the sign of the new {@code - * BigInteger} instance. The given array must not be empty. - * - * @param val two's complement representation of the new {@code BigInteger}. - * @param offset the start offset of the binary representation. - * @param length the number of bytes to use. - * @throws NullPointerException if {@code val == null}. - * @throws NumberFormatException if the length of {@code val} is zero. - */ - public BigInteger(byte[] val, int offset, int length) { - if (val.length == 0) { - // math.12=Zero length BigInteger - throw new NumberFormatException("Zero length BigInteger"); //$NON-NLS-1$ - } - if (length < 0 || offset < 0 || length > val.length - offset) { - throw new IndexOutOfBoundsException("Range check failed: offset=" + offset + ", length=" - + length + ", val.length=" + val.length); - } - if (length == 0) { - sign = 0; - numberLength = 1; - digits = new int[] {0}; - return; - } - if (val[offset] < 0) { - sign = -1; - putBytesNegativeToIntegers(val, offset, length); - } else { - sign = 1; - putBytesPositiveToIntegers(val, offset, length); - } - cutOffLeadingZeroes(); - } - - /** - * Constructs a new {@code BigInteger} instance with the given sign and the - * given magnitude. The sign is given as an integer (-1 for negative, 0 for - * zero, 1 for positive). The magnitude is specified as a byte array. The most - * significant byte is the entry at index 0. - * - * @param signum sign of the new {@code BigInteger} (-1 for negative, 0 for - * zero, 1 for positive). - * @param magnitude magnitude of the new {@code BigInteger} with the most - * significant byte first. - * @throws NullPointerException if {@code magnitude == null}. - * @throws NumberFormatException if the sign is not one of -1, 0, 1 or if the - * sign is zero and the magnitude contains non-zero entries. - */ - public BigInteger(int signum, byte[] magnitude) { - this(signum, magnitude, 0, magnitude.length); - } - - /** - * Constructs a new {@code BigInteger} instance with the given sign and the - * given magnitude. The sign is given as an integer (-1 for negative, 0 for - * zero, 1 for positive). The magnitude is specified as a byte array. The most - * significant byte is the entry at index 0. - * - * @param signum sign of the new {@code BigInteger} (-1 for negative, 0 for - * zero, 1 for positive). - * @param magnitude magnitude of the new {@code BigInteger} with the most - * significant byte first. - * @param offset the start offset of the binary representation. - * @param length the number of bytes to use. - * @throws NullPointerException if {@code magnitude == null}. - * @throws NumberFormatException if the sign is not one of -1, 0, 1 or if the - * sign is zero and the magnitude contains non-zero entries. - */ - public BigInteger(int signum, byte[] magnitude, int offset, int length) { - checkNotNull(magnitude); - - if (length < 0 || offset < 0 || length > magnitude.length - offset) { - throw new IndexOutOfBoundsException("Range check failed: offset=" + offset + ", length=" - + length + ", val.length=" + magnitude.length); - } - - if ((signum < -1) || (signum > 1)) { - // math.13=Invalid signum value - throw new NumberFormatException("Invalid signum value"); //$NON-NLS-1$ - } - if (signum == 0) { - for (int index = offset; index < offset + length; index++) { - byte element = magnitude[index]; - if (element != 0) { - // math.14=signum-magnitude mismatch - throw new NumberFormatException("signum-magnitude mismatch"); //$NON-NLS-1$ - } - } - } - if (length == 0) { - sign = 0; - numberLength = 1; - digits = new int[] {0}; - } else { - sign = signum; - putBytesPositiveToIntegers(magnitude, offset, length); - cutOffLeadingZeroes(); - } - } - - /** - * Constructs a random {@code BigInteger} instance in the range [0, - * 2^(bitLength)-1] which is probably prime. The probability that the returned - * {@code BigInteger} is prime is beyond (1-1/2^certainty). - * - * @param bitLength length of the new {@code BigInteger} in bits. - * @param certainty tolerated primality uncertainty. - * @param rnd is an optional random generator to be used. - * @throws ArithmeticException if {@code bitLength} < 2. - */ - public BigInteger(int bitLength, int certainty, Random rnd) { - if (bitLength < 2) { - // math.1C=bitLength < 2 - throw new ArithmeticException("bitLength < 2"); //$NON-NLS-1$ - } - BigInteger me = Primality.consBigInteger(bitLength, certainty, rnd); - sign = me.sign; - numberLength = me.numberLength; - digits = me.digits; - } - - /** - * Constructs a random non-negative {@code BigInteger} instance in the range - * [0, 2^(numBits)-1]. - * - * @param numBits maximum length of the new {@code BigInteger} in bits. - * @param rnd is an optional random generator to be used. - * @throws IllegalArgumentException if {@code numBits} < 0. - */ - public BigInteger(int numBits, Random rnd) { - checkCriticalArgument(numBits >= 0, "numBits must be non-negative"); - - if (numBits == 0) { - sign = 0; - numberLength = 1; - digits = new int[] {0}; - } else { - sign = 1; - numberLength = (numBits + 31) >> 5; - digits = new int[numberLength]; - for (int i = 0; i < numberLength; i++) { - digits[i] = rnd.nextInt(); - } - // Using only the necessary bits - digits[numberLength - 1] >>>= (-numBits) & 31; - cutOffLeadingZeroes(); - } - } - - /** - * Constructs a new {@code BigInteger} instance from the string - * representation. The string representation consists of an optional minus - * sign followed by a non-empty sequence of decimal digits. - * - * @param val string representation of the new {@code BigInteger}. - * @throws NullPointerException if {@code val == null}. - * @throws NumberFormatException if {@code val} is not a valid representation - * of a {@code BigInteger}. - */ - public BigInteger(String val) { - this(val, 10); - } - - /** - * Constructs a new {@code BigInteger} instance from the string - * representation. The string representation consists of an optional minus - * sign followed by a non-empty sequence of digits in the specified radix. For - * the conversion the method {@code Character.digit(char, radix)} is used. - * - * @param val string representation of the new {@code BigInteger}. - * @param radix the base to be used for the conversion. - * @throws NullPointerException if {@code val == null}. - * @throws NumberFormatException if {@code val} is not a valid representation - * of a {@code BigInteger} or if {@code radix < Character.MIN_RADIX} - * or {@code radix > Character.MAX_RADIX}. - */ - public BigInteger(String val, int radix) { - checkNotNull(val); - - if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { - // math.11=Radix out of range - throw new NumberFormatException("Radix out of range"); //$NON-NLS-1$ - } - if (val.isEmpty()) { - // math.12=Zero length BigInteger - throw new NumberFormatException("Zero length BigInteger"); //$NON-NLS-1$ - } - setFromString(this, val, radix); - } - - /** - * Constructs a number which array is of size 1. - * - * @param sign the sign of the number - * @param value the only one digit of array - */ - BigInteger(int sign, int bits) { - this(sign, 1, new int[] {bits}); - } - - BigInteger(int sign, int lowBits, int highBits) { - this(sign, 2, new int[] {lowBits, highBits}); - } - - /** - * Creates a new {@code BigInteger} with the given sign and magnitude. This constructor does not - * create a copy, so any changes to the reference will affect the new number. - * - * @param signum The sign of the number represented by {@code digits} - * @param digits The magnitude of the number - */ - BigInteger(int signum, int digits[]) { - if (digits.length == 0) { - sign = 0; - numberLength = 1; - this.digits = new int[] {0}; - } else { - sign = signum; - numberLength = digits.length; - this.digits = digits; - cutOffLeadingZeroes(); - } - } - - /** - * Constructs a number without to create new space. This construct should be used only if the - * three fields of representation are known. - * - * @param sign the sign of the number - * @param numberLength the length of the internal array - * @param digits a reference of some array created before - */ - BigInteger(int sign, int numberLength, int[] digits) { - this.sign = sign; - this.numberLength = numberLength; - this.digits = digits; - } - - /** - * Returns a (new) {@code BigInteger} whose value is the absolute value of - * {@code this}. - * - * @return {@code abs(this)}. - */ - public BigInteger abs() { - return sign < 0 ? negate() : this; - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this + val}. - * - * @param val value to be added to {@code this}. - * @return {@code this + val}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger add(BigInteger val) { - return Elementary.add(this, val); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this & val}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param val value to be and'ed with {@code this}. - * @return {@code this & val}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger and(BigInteger val) { - return Logical.and(this, val); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this & ~val}. - * Evaluating {@code x.andNot(val)} returns the same result as {@code - * x.and(val.not())}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param val value to be not'ed and then and'ed with {@code this}. - * @return {@code this & ~val}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger andNot(BigInteger val) { - return Logical.andNot(this, val); - } - - /** - * Use {@code bitLength(0)} if you want to know the length of the binary value - * in bits. - *

- * Returns the number of bits in the binary representation of {@code this} - * which differ from the sign bit. If {@code this} is positive the result is - * equivalent to the number of bits set in the binary representation of - * {@code this}. If {@code this} is negative the result is equivalent to the - * number of bits set in the binary representation of {@code -this-1}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @return number of bits in the binary representation of {@code this} which - * differ from the sign bit - */ - public int bitCount() { - return BitLevel.bitCount(this); - } - - /** - * Returns the length of the value's two's complement representation without - * leading zeros for positive numbers / without leading ones for negative - * values. - *

- * The two's complement representation of {@code this} will be at least - * {@code bitLength() + 1} bits long. - *

- * The value will fit into an {@code int} if {@code bitLength() < 32} or into - * a {@code long} if {@code bitLength() < 64}. - * - * @return the length of the minimal two's complement representation for - * {@code this} without the sign bit. - */ - public int bitLength() { - return BitLevel.bitLength(this); - } - - /** - * Converts value of this {@code BigInteger} to a {@code byte} if it fits it, - * otherwise {@code ArithmeticException} is thrown. - * - * @return this {@code BigInteger} converted to a {@code byte}. - * @throws ArithmeticException if the value of this {@code BigInteger} - * does not fit in a {@code byte}. - */ - public byte byteValueExact() { - if (numberLength <= 1 && bitLength() < Byte.SIZE) { - return byteValue(); - } - throw new ArithmeticException("out of byte range"); - } - - /** - * Returns a new {@code BigInteger} which has the same binary representation - * as {@code this} but with the bit at position n cleared. The result is - * equivalent to {@code this & ~(2^n)}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param n position where the bit in {@code this} has to be cleared. - * @return {@code this & ~(2^n)}. - * @throws ArithmeticException if {@code n < 0}. - */ - public BigInteger clearBit(int n) { - if (testBit(n)) { - return BitLevel.flipBit(this, n); - } - return this; - } - - /** - * Compares this {@code BigInteger} with {@code val}. Returns one of the three - * values 1, 0, or -1. - * - * @param val value to be compared with {@code this}. - * @return {@code 1} if {@code this > val}, {@code -1} if {@code this < val} , - * {@code 0} if {@code this == val}. - * @throws NullPointerException if {@code val == null}. - */ - @Override - public int compareTo(BigInteger val) { - if (sign > val.sign) { - return GREATER; - } - if (sign < val.sign) { - return LESS; - } - if (numberLength > val.numberLength) { - return sign; - } - if (numberLength < val.numberLength) { - return -val.sign; - } - // Equal sign and equal numberLength - return (sign * Elementary.compareArrays(digits, val.digits, numberLength)); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this / divisor}. - * - * @param divisor value by which {@code this} is divided. - * @return {@code this / divisor}. - * @throws NullPointerException if {@code divisor == null}. - * @throws ArithmeticException if {@code divisor == 0}. - */ - public BigInteger divide(BigInteger divisor) { - if (divisor.sign == 0) { - // math.17=BigInteger divide by zero - throw new ArithmeticException("BigInteger divide by zero"); //$NON-NLS-1$ - } - int divisorSign = divisor.sign; - if (divisor.isOne()) { - return ((divisor.sign > 0) ? this : this.negate()); - } - int thisSign = sign; - int thisLen = numberLength; - int divisorLen = divisor.numberLength; - if (thisLen + divisorLen == 2) { - long val = (digits[0] & 0xFFFFFFFFL) / (divisor.digits[0] & 0xFFFFFFFFL); - if (thisSign != divisorSign) { - val = -val; - } - return valueOf(val); - } - int cmp = ((thisLen != divisorLen) ? ((thisLen > divisorLen) ? 1 : -1) - : Elementary.compareArrays(digits, divisor.digits, thisLen)); - if (cmp == EQUALS) { - return ((thisSign == divisorSign) ? ONE : MINUS_ONE); - } - if (cmp == LESS) { - return ZERO; - } - int resLength = thisLen - divisorLen + 1; - int resDigits[] = new int[resLength]; - int resSign = ((thisSign == divisorSign) ? 1 : -1); - if (divisorLen == 1) { - Division.divideArrayByInt(resDigits, digits, thisLen, divisor.digits[0]); - } else { - Division.divide(resDigits, resLength, digits, thisLen, divisor.digits, - divisorLen); - } - BigInteger result = new BigInteger(resSign, resLength, resDigits); - result.cutOffLeadingZeroes(); - return result; - } - - /** - * Returns a {@code BigInteger} array which contains {@code this / divisor} at - * index 0 and {@code this % divisor} at index 1. - * - * @param divisor value by which {@code this} is divided. - * @return {@code [this / divisor, this % divisor]}. - * @throws NullPointerException if {@code divisor == null}. - * @throws ArithmeticException if {@code divisor == 0}. - * @see #divide - * @see #remainder - */ - public BigInteger[] divideAndRemainder(BigInteger divisor) { - int divisorSign = divisor.sign; - if (divisorSign == 0) { - // math.17=BigInteger divide by zero - throw new ArithmeticException("BigInteger divide by zero"); //$NON-NLS-1$ - } - int divisorLen = divisor.numberLength; - int[] divisorDigits = divisor.digits; - if (divisorLen == 1) { - return Division.divideAndRemainderByInteger(this, divisorDigits[0], - divisorSign); - } - // res[0] is a quotient and res[1] is a remainder: - int[] thisDigits = digits; - int thisLen = numberLength; - int cmp = (thisLen != divisorLen) ? ((thisLen > divisorLen) ? 1 : -1) - : Elementary.compareArrays(thisDigits, divisorDigits, thisLen); - if (cmp < 0) { - return new BigInteger[] {ZERO, this}; - } - int thisSign = sign; - int quotientLength = thisLen - divisorLen + 1; - int remainderLength = divisorLen; - int quotientSign = ((thisSign == divisorSign) ? 1 : -1); - int quotientDigits[] = new int[quotientLength]; - int remainderDigits[] = Division.divide(quotientDigits, quotientLength, - thisDigits, thisLen, divisorDigits, divisorLen); - BigInteger result0 = new BigInteger(quotientSign, quotientLength, - quotientDigits); - BigInteger result1 = new BigInteger(thisSign, remainderLength, - remainderDigits); - result0.cutOffLeadingZeroes(); - result1.cutOffLeadingZeroes(); - return new BigInteger[] {result0, result1}; - } - - /** - * Returns this {@code BigInteger} as an double value. If {@code this} is too - * big to be represented as an double, then {@code Double.POSITIVE_INFINITY} - * or {@code Double.NEGATIVE_INFINITY} is returned. Note, that not all - * integers x in the range [-Double.MAX_VALUE, Double.MAX_VALUE] can be - * represented as a double. The double representation has a mantissa of length - * 53. For example, 2^53+1 = 9007199254740993 is returned as double - * 9007199254740992.0. - * - * @return this {@code BigInteger} as a double value - */ - @Override - public double doubleValue() { - return Double.parseDouble(this.toString()); - } - - /** - * Returns {@code true} if {@code x} is a BigInteger instance and if this - * instance is equal to this {@code BigInteger}. - * - * @param x object to be compared with {@code this}. - * @return true if {@code x} is a BigInteger and {@code this == x}, {@code - * false} otherwise. - */ - @Override - public boolean equals(Object x) { - if (this == x) { - return true; - } - if (x instanceof BigInteger) { - BigInteger x1 = (BigInteger) x; - return sign == x1.sign && numberLength == x1.numberLength - && equalsArrays(x1.digits); - } - return false; - } - - /** - * Returns a new {@code BigInteger} which has the same binary representation - * as {@code this} but with the bit at position n flipped. The result is - * equivalent to {@code this ^ 2^n}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param n position where the bit in {@code this} has to be flipped. - * @return {@code this ^ 2^n}. - * @throws ArithmeticException if {@code n < 0}. - */ - public BigInteger flipBit(int n) { - if (n < 0) { - // math.15=Negative bit address - throw new ArithmeticException("Negative bit address"); //$NON-NLS-1$ - } - return BitLevel.flipBit(this, n); - } - - /** - * Returns this {@code BigInteger} as an float value. If {@code this} is too - * big to be represented as an float, then {@code Float.POSITIVE_INFINITY} or - * {@code Float.NEGATIVE_INFINITY} is returned. Note, that not all integers x - * in the range [-Float.MAX_VALUE, Float.MAX_VALUE] can be represented as a - * float. The float representation has a mantissa of length 24. For example, - * 2^24+1 = 16777217 is returned as float 16777216.0. - * - * @return this {@code BigInteger} as a float value. - */ - @Override - public float floatValue() { - return Float.parseFloat(this.toString()); - } - - /** - * Returns a new {@code BigInteger} whose value is greatest common divisor of - * {@code this} and {@code val}. If {@code this==0} and {@code val==0} then - * zero is returned, otherwise the result is positive. - * - * @param val value with which the greatest common divisor is computed. - * @return {@code gcd(this, val)}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger gcd(BigInteger val) { - BigInteger val1 = this.abs(); - BigInteger val2 = val.abs(); - // To avoid a possible division by zero - if (val1.signum() == 0) { - return val2; - } else if (val2.signum() == 0) { - return val1; - } - - // Optimization for small operands - // (op2.bitLength() < 64) and (op1.bitLength() < 64) - if (((val1.numberLength == 1) || ((val1.numberLength == 2) && (val1.digits[1] > 0))) - && (val2.numberLength == 1 || (val2.numberLength == 2 && val2.digits[1] > 0))) { - return BigInteger.valueOf(Division.gcdBinary(val1.longValue(), - val2.longValue())); - } - - return Division.gcdBinary(val1.copy(), val2.copy()); - } - - /** - * Returns the position of the lowest set bit in the two's complement - * representation of this {@code BigInteger}. If all bits are zero (this=0) - * then -1 is returned as result. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @return position of lowest bit if {@code this != 0}, {@code -1} otherwise - */ - public int getLowestSetBit() { - if (sign == 0) { - return -1; - } - // (sign != 0) implies that exists some non zero digit - int i = getFirstNonzeroDigit(); - return ((i << 5) + Integer.numberOfTrailingZeros(digits[i])); - } - - /** - * Returns a hash code for this {@code BigInteger}. - * - * @return hash code for {@code this}. - */ - @Override - public int hashCode() { - if (hashCode != 0) { - return hashCode; - } - for (int i = 0; i < digits.length; i++) { - hashCode = (hashCode * 33 + (digits[i] & 0xffffffff)); - } - hashCode = hashCode * sign; - return hashCode; - } - - /** - * Returns this {@code BigInteger} as an int value. If {@code this} is too big - * to be represented as an int, then {@code this} % 2^32 is returned. - * - * @return this {@code BigInteger} as an int value. - */ - @Override - public int intValue() { - int i = digits[0]; - // i is always positive except for Integer.MIN_VALUE because of int overflow - return sign > 0 ? i : ensureInt(-i); - } - - /** - * Converts value of this {@code BigInteger} to an {@code int} if it fits it, - * otherwise {@code ArithmeticException} is thrown. - * - * @return this {@code BigInteger} converted to an {@code int}. - * @throws ArithmeticException if the value of this {@code BigInteger} - * does not fit in an {@code int}. - */ - public int intValueExact() { - if (numberLength <= 1 && bitLength() < Integer.SIZE) { - return intValue(); - } - throw new ArithmeticException("out of int range"); - } - - /** - * Tests whether this {@code BigInteger} is probably prime. If {@code true} is - * returned, then this is prime with a probability beyond (1-1/2^certainty). - * If {@code false} is returned, then this is definitely composite. If the - * argument {@code certainty} <= 0, then this method returns true. - * - * @param certainty tolerated primality uncertainty. - * @return {@code true}, if {@code this} is probably prime, {@code false} - * otherwise. - */ - public boolean isProbablePrime(int certainty) { - return Primality.isProbablePrime(abs(), certainty); - } - - /** - * Returns this {@code BigInteger} as an long value. If {@code this} is too - * big to be represented as an long, then {@code this} % 2^64 is returned. - * - * @return this {@code BigInteger} as a long value. - */ - @Override - public long longValue() { - long value = - numberLength > 1 - ? LongUtils.fromBits(digits[0], digits[1]) - : LongUtils.fromBits(digits[0], 0); - return sign > 0 ? value : -value; - } - - /** - * Converts value of this {@code BigInteger} to a {@code long} if it fits it, - * otherwise {@code ArithmeticException} is thrown. - * - * @return this {@code BigInteger} converted to a {@code long}. - * @throws ArithmeticException if the value of this {@code BigInteger} - * does not fit in a {@code long}. - */ - public long longValueExact() { - if (numberLength <= 2 && bitLength() < Long.SIZE) { - return longValue(); - } - throw new ArithmeticException("out of long range"); - } - - /** - * Returns the maximum of this {@code BigInteger} and {@code val}. - * - * @param val value to be used to compute the maximum with {@code this} - * @return {@code max(this, val)} - * @throws NullPointerException if {@code val == null} - */ - public BigInteger max(BigInteger val) { - return ((this.compareTo(val) == GREATER) ? this : val); - } - - /** - * Returns the minimum of this {@code BigInteger} and {@code val}. - * - * @param val value to be used to compute the minimum with {@code this}. - * @return {@code min(this, val)}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger min(BigInteger val) { - return ((this.compareTo(val) == LESS) ? this : val); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this mod m}. The - * modulus {@code m} must be positive. The result is guaranteed to be in the - * interval {@code [0, m)} (0 inclusive, m exclusive). The behavior of this - * function is not equivalent to the behavior of the % operator defined for - * the built-in {@code int}'s. - * - * @param m the modulus. - * @return {@code this mod m}. - * @throws NullPointerException if {@code m == null}. - * @throws ArithmeticException if {@code m < 0}. - */ - public BigInteger mod(BigInteger m) { - if (m.sign <= 0) { - // math.18=BigInteger: modulus not positive - throw new ArithmeticException("BigInteger: modulus not positive"); //$NON-NLS-1$ - } - BigInteger rem = remainder(m); - return ((rem.sign < 0) ? rem.add(m) : rem); - } - - // @Override - // public double doubleValue() { - // return Conversion.bigInteger2Double(this); - // } - - /** - * Returns a new {@code BigInteger} whose value is {@code 1/this mod m}. The - * modulus {@code m} must be positive. The result is guaranteed to be in the - * interval {@code [0, m)} (0 inclusive, m exclusive). If {@code this} is not - * relatively prime to m, then an exception is thrown. - * - * @param m the modulus. - * @return {@code 1/this mod m}. - * @throws NullPointerException if {@code m == null} - * @throws ArithmeticException if {@code m < 0 or} if {@code this} is not - * relatively prime to {@code m} - */ - public BigInteger modInverse(BigInteger m) { - if (m.sign <= 0) { - // math.18=BigInteger: modulus not positive - throw new ArithmeticException("BigInteger: modulus not positive"); //$NON-NLS-1$ - } - // If both are even, no inverse exists - if (!(testBit(0) || m.testBit(0))) { - // math.19=BigInteger not invertible. - throw new ArithmeticException("BigInteger not invertible."); //$NON-NLS-1$ - } - if (m.isOne()) { - return ZERO; - } - - // From now on: (m > 1) - BigInteger res = Division.modInverseMontgomery(abs().mod(m), m); - if (res.sign == 0) { - // math.19=BigInteger not invertible. - throw new ArithmeticException("BigInteger not invertible."); //$NON-NLS-1$ - } - - res = ((sign < 0) ? m.subtract(res) : res); - return res; - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this^exponent mod m} - * . The modulus {@code m} must be positive. The result is guaranteed to be in - * the interval {@code [0, m)} (0 inclusive, m exclusive). If the exponent is - * negative, then {@code this.modInverse(m)^(-exponent) mod m)} is computed. - * The inverse of this only exists if {@code this} is relatively prime to m, - * otherwise an exception is thrown. - * - * @param exponent the exponent. - * @param m the modulus. - * @return {@code this^exponent mod val}. - * @throws NullPointerException if {@code m == null} or {@code exponent == - * null}. - * @throws ArithmeticException if {@code m < 0} or if {@code exponent<0} and - * this is not relatively prime to {@code m}. - */ - public BigInteger modPow(BigInteger exponent, BigInteger m) { - if (m.sign <= 0) { - // math.18=BigInteger: modulus not positive - throw new ArithmeticException("BigInteger: modulus not positive"); //$NON-NLS-1$ - } - BigInteger base = this; - - if (m.isOne() | (exponent.sign > 0 & base.sign == 0)) { - return BigInteger.ZERO; - } - if (base.sign == 0 && exponent.sign == 0) { - return BigInteger.ONE; - } - if (exponent.sign < 0) { - base = modInverse(m); - exponent = exponent.negate(); - } - // From now on: (m > 0) and (exponent >= 0) - BigInteger res = (m.testBit(0)) ? Division.oddModPow(base.abs(), exponent, - m) : Division.evenModPow(base.abs(), exponent, m); - if ((base.sign < 0) && exponent.testBit(0)) { - // -b^e mod m == ((-1 mod m) * (b^e mod m)) mod m - res = m.subtract(BigInteger.ONE).multiply(res).mod(m); - } - // else exponent is even, so base^exp is positive - return res; - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this * val}. - * - * @param val value to be multiplied with {@code this}. - * @return {@code this * val}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger multiply(BigInteger val) { - // This let us to throw NullPointerException when val == null - if (val.sign == 0 || sign == 0) { - return ZERO; - } - return Multiplication.multiply(this, val); - } - - /** - * Returns a new {@code BigInteger} whose value is the {@code -this}. - * - * @return {@code -this}. - */ - public BigInteger negate() { - return sign == 0 ? this : new BigInteger(-sign, numberLength, digits); - } - - /** - * Returns the smallest integer x > {@code this} which is probably prime as a - * {@code BigInteger} instance. The probability that the returned {@code - * BigInteger} is prime is beyond (1-1/2^80). - * - * @return smallest integer > {@code this} which is robably prime. - * @throws ArithmeticException if {@code this < 0}. - */ - public BigInteger nextProbablePrime() { - if (sign < 0) { - // math.1A=start < 0: {0} - throw new ArithmeticException("start < 0: " + this); //$NON-NLS-1$ - } - return Primality.nextProbablePrime(this); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code ~this}. The result - * of this operation is {@code -this-1}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @return {@code ~this}. - */ - public BigInteger not() { - return Logical.not(this); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this | val}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param val value to be or'ed with {@code this}. - * @return {@code this | val}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger or(BigInteger val) { - return Logical.or(this, val); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this ^ exp}. - * - * @param exp exponent to which {@code this} is raised. - * @return {@code this ^ exp}. - * @throws ArithmeticException if {@code exp < 0}. - */ - public BigInteger pow(int exp) { - if (exp < 0) { - // math.16=Negative exponent - throw new ArithmeticException("Negative exponent"); //$NON-NLS-1$ - } - if (exp == 0) { - return ONE; - } else if (exp == 1 || equals(ONE) || equals(ZERO)) { - return this; - } - - // if even take out 2^x factor which we can - // calculate by shifting. - if (!testBit(0)) { - int x = 1; - while (!testBit(x)) { - x++; - } - return getPowerOfTwo(x * exp).multiply(this.shiftRight(x).pow(exp)); - } - return Multiplication.pow(this, exp); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this % divisor}. - * Regarding signs this methods has the same behavior as the % operator on - * int's, i.e. the sign of the remainder is the same as the sign of this. - * - * @param divisor value by which {@code this} is divided. - * @return {@code this % divisor}. - * @throws NullPointerException if {@code divisor == null}. - * @throws ArithmeticException if {@code divisor == 0}. - */ - public BigInteger remainder(BigInteger divisor) { - if (divisor.sign == 0) { - // math.17=BigInteger divide by zero - throw new ArithmeticException("BigInteger divide by zero"); //$NON-NLS-1$ - } - int thisLen = numberLength; - int divisorLen = divisor.numberLength; - if (((thisLen != divisorLen) ? ((thisLen > divisorLen) ? 1 : -1) - : Elementary.compareArrays(digits, divisor.digits, thisLen)) == LESS) { - return this; - } - int resLength = divisorLen; - int resDigits[] = new int[resLength]; - if (resLength == 1) { - resDigits[0] = Division.remainderArrayByInt(digits, thisLen, - divisor.digits[0]); - } else { - int qLen = thisLen - divisorLen + 1; - resDigits = Division.divide(null, qLen, digits, thisLen, divisor.digits, - divisorLen); - } - BigInteger result = new BigInteger(sign, resLength, resDigits); - result.cutOffLeadingZeroes(); - return result; - } - - /** - * Returns a new {@code BigInteger} which has the same binary representation - * as {@code this} but with the bit at position n set. The result is - * equivalent to {@code this | 2^n}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param n position where the bit in {@code this} has to be set. - * @return {@code this | 2^n}. - * @throws ArithmeticException if {@code n < 0}. - */ - public BigInteger setBit(int n) { - if (!testBit(n)) { - return BitLevel.flipBit(this, n); - } - return this; - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this << n}. The - * result is equivalent to {@code this * 2^n} if n >= 0. The shift distance - * may be negative which means that {@code this} is shifted right. The result - * then corresponds to {@code floor(this / 2^(-n))}. - *

- * Implementation Note: Usage of this method on negative values is not - * recommended as the current implementation is not efficient. - * - * @param n shift distance. - * @return {@code this << n} if {@code n >= 0}; {@code this >> (-n)}. - * otherwise - */ - public BigInteger shiftLeft(int n) { - if ((n == 0) || (sign == 0)) { - return this; - } - return ((n > 0) ? BitLevel.shiftLeft(this, n) : BitLevel.shiftRight(this, - -n)); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this >> n}. For - * negative arguments, the result is also negative. The shift distance may be - * negative which means that {@code this} is shifted left. - *

- * Implementation Note: Usage of this method on negative values is not - * recommended as the current implementation is not efficient. - * - * @param n shift distance - * @return {@code this >> n} if {@code n >= 0}; {@code this << (-n)} otherwise - */ - public BigInteger shiftRight(int n) { - if ((n == 0) || (sign == 0)) { - return this; - } - return ((n > 0) ? BitLevel.shiftRight(this, n) : BitLevel.shiftLeft(this, - -n)); - } - - /** - * Converts value of this {@code BigInteger} to a {@code short} if it fits it, - * otherwise {@code ArithmeticException} is thrown. - * - * @return this {@code BigInteger} converted to a {@code short}. - * @throws ArithmeticException if the value of this {@code BigInteger} - * does not fit in a {@code short}. - */ - public short shortValueExact() { - if (numberLength <= 1 && bitLength() < Short.SIZE) { - return shortValue(); - } - throw new ArithmeticException("out of short range"); - } - - /** - * Returns the sign of this {@code BigInteger}. - * - * @return {@code -1} if {@code this < 0}, {@code 0} if {@code this == 0}, - * {@code 1} if {@code this > 0}. - */ - public int signum() { - return sign; - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this - val}. - * - * @param val value to be subtracted from {@code this}. - * @return {@code this - val}. - * @throws NullPointerException if {@code val == null}. - */ - public BigInteger subtract(BigInteger val) { - return Elementary.subtract(this, val); - } - - /** - * Tests whether the bit at position n in {@code this} is set. The result is - * equivalent to {@code this & (2^n) != 0}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param n position where the bit in {@code this} has to be inspected. - * @return {@code this & (2^n) != 0}. - * @throws ArithmeticException if {@code n < 0}. - */ - public boolean testBit(int n) { - if (n == 0) { - return ((digits[0] & 1) != 0); - } - if (n < 0) { - // math.15=Negative bit address - throw new ArithmeticException("Negative bit address"); //$NON-NLS-1$ - } - int intCount = n >> 5; - if (intCount >= numberLength) { - return (sign < 0); - } - int digit = digits[intCount]; - n = (1 << (n & 31)); // int with 1 set to the needed position - if (sign < 0) { - int firstNonZeroDigit = getFirstNonzeroDigit(); - if (intCount < firstNonZeroDigit) { - return false; - } else if (firstNonZeroDigit == intCount) { - digit = -digit; - } else { - digit = ~digit; - } - } - return ((digit & n) != 0); - } - - /** - * Returns the two's complement representation of this BigInteger in a byte - * array. - * - * @return two's complement representation of {@code this}. - */ - public byte[] toByteArray() { - if (this.sign == 0) { - return new byte[] {0}; - } - BigInteger temp = this; - int bitLen = bitLength(); - int iThis = getFirstNonzeroDigit(); - int bytesLen = (bitLen >> 3) + 1; - /* - * Puts the little-endian int array representing the magnitude of this - * BigInteger into the big-endian byte array. - */ - byte[] bytes = new byte[bytesLen]; - int firstByteNumber = 0; - int highBytes; - int digitIndex = 0; - int bytesInInteger = 4; - int digit; - int hB; - - if (bytesLen - (numberLength << 2) == 1) { - bytes[0] = (byte) ((sign < 0) ? -1 : 0); - highBytes = 4; - firstByteNumber++; - } else { - hB = bytesLen & 3; - highBytes = (hB == 0) ? 4 : hB; - } - - digitIndex = iThis; - bytesLen -= iThis << 2; - - if (sign < 0) { - digit = -temp.digits[digitIndex]; - digitIndex++; - if (digitIndex == numberLength) { - bytesInInteger = highBytes; - } - for (int i = 0; i < bytesInInteger; i++, digit >>= 8) { - bytes[--bytesLen] = (byte) digit; - } - while (bytesLen > firstByteNumber) { - digit = ~temp.digits[digitIndex]; - digitIndex++; - if (digitIndex == numberLength) { - bytesInInteger = highBytes; - } - for (int i = 0; i < bytesInInteger; i++, digit >>= 8) { - bytes[--bytesLen] = (byte) digit; - } - } - } else { - while (bytesLen > firstByteNumber) { - digit = temp.digits[digitIndex]; - digitIndex++; - if (digitIndex == numberLength) { - bytesInInteger = highBytes; - } - for (int i = 0; i < bytesInInteger; i++, digit >>= 8) { - bytes[--bytesLen] = (byte) digit; - } - } - } - return bytes; - } - - /** - * Returns a string representation of this {@code BigInteger} in decimal form. - * - * @return a string representation of {@code this} in decimal form. - */ - @Override - public String toString() { - return Conversion.toDecimalScaledString(this, 0); - } - - /** - * Returns a string containing a string representation of this {@code - * BigInteger} with base radix. If {@code radix} is less than - * {@link Character#MIN_RADIX} or greater than {@link Character#MAX_RADIX} - * then a decimal representation is returned. The characters of the string - * representation are generated with method {@link Character#forDigit}. - * - * @param radix base to be used for the string representation. - * @return a string representation of this with radix 10. - */ - public String toString(int radix) { - return Conversion.bigInteger2String(this, radix); - } - - /** - * Returns a new {@code BigInteger} whose value is {@code this ^ val}. - *

- * Implementation Note: Usage of this method is not recommended as the - * current implementation is not efficient. - * - * @param val value to be xor'ed with {@code this} - * @return {@code this ^ val} - * @throws NullPointerException if {@code val == null} - */ - public BigInteger xor(BigInteger val) { - return Logical.xor(this, val); - } - - /* - * Returns a copy of the current instance to achieve immutability - */ - BigInteger copy() { - int[] copyDigits = new int[numberLength]; - System.arraycopy(digits, 0, copyDigits, 0, numberLength); - return new BigInteger(sign, numberLength, copyDigits); - } - - /* Private Methods */ - - /** - * Decreases {@code numberLength} if there are zero high elements. - */ - final void cutOffLeadingZeroes() { - while ((numberLength > 0) && (digits[--numberLength] == 0)) { - // Empty - } - if (digits[numberLength++] == 0) { - sign = 0; - } - } - - boolean equalsArrays(final int[] b) { - int i; - for (i = numberLength - 1; (i >= 0) && (digits[i] == b[i]); i--) { - // Empty - } - return i < 0; - } - - int getFirstNonzeroDigit() { - if (firstNonzeroDigit == -2) { - int i; - if (this.sign == 0) { - i = -1; - } else { - for (i = 0; digits[i] == 0; i++) { - // Empty - } - } - firstNonzeroDigit = i; - } - return firstNonzeroDigit; - } - - /** - * Tests if {@code this.abs()} is equals to {@code ONE}. - */ - boolean isOne() { - return ((numberLength == 1) && (digits[0] == 1)); - } - - BigInteger shiftLeftOneBit() { - return (sign == 0) ? this : BitLevel.shiftLeftOneBit(this); - } - - void unCache() { - firstNonzeroDigit = -2; - } - - /** - * Puts a big-endian byte array into a little-endian applying two complement. - */ - private void putBytesNegativeToIntegers(byte[] byteValues, int offset, int length) { - int bytesLen = length; - int highBytes = bytesLen & 3; - numberLength = (bytesLen >> 2) + ((highBytes == 0) ? 0 : 1); - digits = new int[numberLength]; - int i = 0; - // Setting the sign - digits[numberLength - 1] = -1; - // Put bytes to the int array starting from the end of the byte array - while (bytesLen > highBytes) { - digits[i] = (byteValues[--bytesLen + offset] & 0xFF) - | (byteValues[--bytesLen + offset] & 0xFF) << 8 - | (byteValues[--bytesLen + offset] & 0xFF) << 16 - | (byteValues[--bytesLen + offset] & 0xFF) << 24; - if (digits[i] != 0) { - digits[i] = -digits[i]; - firstNonzeroDigit = i; - i++; - while (bytesLen > highBytes) { - digits[i] = (byteValues[--bytesLen + offset] & 0xFF) - | (byteValues[--bytesLen + offset] & 0xFF) << 8 - | (byteValues[--bytesLen + offset] & 0xFF) << 16 - | (byteValues[--bytesLen + offset] & 0xFF) << 24; - digits[i] = ~digits[i]; - i++; - } - break; - } - i++; - } - if (highBytes != 0) { - // Put the first bytes in the highest element of the int array - if (firstNonzeroDigit != -2) { - for (int j = offset; j < bytesLen + offset; j++) { - digits[i] = (digits[i] << 8) | (byteValues[j] & 0xFF); - } - digits[i] = ~digits[i]; - } else { - for (int j = offset; j < bytesLen + offset; j++) { - digits[i] = (digits[i] << 8) | (byteValues[j] & 0xFF); - } - digits[i] = -digits[i]; - } - } - } - - /** - * Puts a big-endian byte array into a little-endian int array. - */ - private void putBytesPositiveToIntegers(byte[] byteValues, int offset, int length) { - int bytesLen = length; - int highBytes = bytesLen & 3; - numberLength = (bytesLen >> 2) + ((highBytes == 0) ? 0 : 1); - digits = new int[numberLength]; - int i = 0; - // Put bytes to the int array starting from the end of the byte array - while (bytesLen > highBytes) { - digits[i++] = (byteValues[--bytesLen + offset] & 0xFF) - | (byteValues[--bytesLen + offset] & 0xFF) << 8 - | (byteValues[--bytesLen + offset] & 0xFF) << 16 - | (byteValues[--bytesLen + offset] & 0xFF) << 24; - } - // Put the first bytes in the highest element of the int array - for (int j = offset; j < bytesLen + offset; j++) { - digits[i] = (digits[i] << 8) | (byteValues[j] & 0xFF); - } - } -} diff --git a/web/client-ui/Dockerfile b/web/client-ui/Dockerfile index c735bb03594..11d58acfb38 100644 --- a/web/client-ui/Dockerfile +++ b/web/client-ui/Dockerfile @@ -2,10 +2,10 @@ FROM deephaven/node:local-build WORKDIR /usr/src/app # Most of the time, these versions are the same, except in cases where a patch only affects one of the packages -ARG WEB_VERSION=0.99.0 +ARG WEB_VERSION=0.99.1 ARG GRID_VERSION=0.99.0 ARG CHART_VERSION=0.99.0 -ARG WIDGET_VERSION=0.99.0 +ARG WIDGET_VERSION=0.99.1 # Pull in the published code-studio package from npmjs and extract is RUN set -eux; \