From 68b63b215539dbc40b6d9657bf8286519dd30a6c Mon Sep 17 00:00:00 2001 From: Stan Brubaker <120737309+stanbrub@users.noreply.github.com> Date: Tue, 7 Nov 2023 18:06:58 -0700 Subject: [PATCH 01/41] Post release bump to 0.31.0 (#4793) --- R/rdeephaven/DESCRIPTION | 2 +- authorization-codegen/protoc-gen-contextual-auth-wiring | 2 +- authorization-codegen/protoc-gen-service-auth-wiring | 2 +- buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle | 2 +- py/client-ticking/README.md | 2 +- py/client-ticking/setup.py | 2 +- py/client/README.md | 2 +- py/client/pydeephaven/__init__.py | 2 +- py/client/setup.py | 2 +- py/embedded-server/deephaven_server/__init__.py | 2 +- py/server/deephaven/__init__.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index 4900554b228..b3bdc2cb3b4 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -1,7 +1,7 @@ Package: rdeephaven Type: Package Title: R Client for Deephaven Core -Version: 0.30.0 +Version: 0.31.0 Date: 2023-05-12 Author: Deephaven Data Labs Maintainer: Alex Peters diff --git a/authorization-codegen/protoc-gen-contextual-auth-wiring b/authorization-codegen/protoc-gen-contextual-auth-wiring index 34edd31631d..3da564626b2 100755 --- a/authorization-codegen/protoc-gen-contextual-auth-wiring +++ b/authorization-codegen/protoc-gen-contextual-auth-wiring @@ -1,2 +1,2 @@ # protoc-gen-contextual-auth-wiring -java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.30.0-all.jar io.deephaven.auth.codegen.GenerateContextualAuthWiring +java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.31.0-all.jar io.deephaven.auth.codegen.GenerateContextualAuthWiring diff --git a/authorization-codegen/protoc-gen-service-auth-wiring b/authorization-codegen/protoc-gen-service-auth-wiring index 9b088c2a4ef..2d552991072 100755 --- a/authorization-codegen/protoc-gen-service-auth-wiring +++ b/authorization-codegen/protoc-gen-service-auth-wiring @@ -1,2 +1,2 @@ # protoc-gen-service-auth-wiring -java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.30.0-all.jar io.deephaven.auth.codegen.GenerateServiceAuthWiring +java -cp authorization-codegen/build/libs/deephaven-authorization-codegen-0.31.0-all.jar io.deephaven.auth.codegen.GenerateServiceAuthWiring diff --git a/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle b/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle index eb115d35cc7..5bb440b1221 100644 --- a/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle +++ b/buildSrc/src/main/groovy/io.deephaven.common-conventions.gradle @@ -5,7 +5,7 @@ plugins { } group = 'io.deephaven' -version = '0.30.0' +version = '0.31.0' if (!name.startsWith('deephaven-')) { archivesBaseName = "deephaven-${name}" diff --git a/py/client-ticking/README.md b/py/client-ticking/README.md index c1391934bb6..0892f2d2a21 100644 --- a/py/client-ticking/README.md +++ b/py/client-ticking/README.md @@ -66,7 +66,7 @@ Then install the package. Note the actual name of the `.whl` file may be different depending on system details. ``` -pip3 install --force --no-deps dist/pydeephaven_ticking-0.30.0-cp310-cp310-linux_x86_64.whl +pip3 install --force --no-deps dist/pydeephaven_ticking-0.31.0-cp310-cp310-linux_x86_64.whl ``` The reason for the "--force" flag is to overwrite any previously-built version of the package that diff --git a/py/client-ticking/setup.py b/py/client-ticking/setup.py index fa34a64311b..62ae8767bd8 100644 --- a/py/client-ticking/setup.py +++ b/py/client-ticking/setup.py @@ -44,5 +44,5 @@ libraries=["dhcore_static"] )]), python_requires='>=3.8', - install_requires=['pydeephaven==0.30.0'] + install_requires=['pydeephaven==0.31.0'] ) diff --git a/py/client/README.md b/py/client/README.md index 76588f249b4..c52af7bceef 100644 --- a/py/client/README.md +++ b/py/client/README.md @@ -38,7 +38,7 @@ $ python3 -m examples.demo_asof_join Note the actual name of the `.whl` file may be different depending on system details. ``` shell -$ pip3 install dist/pydeephaven-0.30.0-py3-none-any.whl +$ pip3 install dist/pydeephaven-0.31.0-py3-none-any.whl ``` ## Quick start diff --git a/py/client/pydeephaven/__init__.py b/py/client/pydeephaven/__init__.py index ef669e429ee..737f752bf21 100644 --- a/py/client/pydeephaven/__init__.py +++ b/py/client/pydeephaven/__init__.py @@ -35,4 +35,4 @@ pass __all__ = ["Session", "DHError", "SortDirection"] -__version__ = "0.30.0" +__version__ = "0.31.0" diff --git a/py/client/setup.py b/py/client/setup.py index fed9b59bb25..0d0826a9973 100644 --- a/py/client/setup.py +++ b/py/client/setup.py @@ -12,7 +12,7 @@ setup( name='pydeephaven', - version='0.30.0', + version='0.31.0', description='The Deephaven Python Client', long_description=README, long_description_content_type="text/markdown", diff --git a/py/embedded-server/deephaven_server/__init__.py b/py/embedded-server/deephaven_server/__init__.py index 6b863cbd8cd..f259fa65e23 100644 --- a/py/embedded-server/deephaven_server/__init__.py +++ b/py/embedded-server/deephaven_server/__init__.py @@ -1,7 +1,7 @@ # # Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending # -__version__ = "0.30.0" +__version__ = "0.31.0" from .start_jvm import DEFAULT_JVM_PROPERTIES, DEFAULT_JVM_ARGS, start_jvm from .server import Server diff --git a/py/server/deephaven/__init__.py b/py/server/deephaven/__init__.py index 8ac3624b708..d705c58049a 100644 --- a/py/server/deephaven/__init__.py +++ b/py/server/deephaven/__init__.py @@ -7,7 +7,7 @@ """ -__version__ = "0.30.0" +__version__ = "0.31.0" from deephaven_internal import jvm From 0d31629228d15779bccc7e575bd01add24157b02 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Wed, 8 Nov 2023 16:21:20 -0500 Subject: [PATCH 02/41] C++ Client: improve proto build script test for missing envars (#4796) --- proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh b/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh index 9b36346a61d..c24b0109a50 100755 --- a/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh +++ b/proto/proto-backplane-grpc/src/main/proto/build-cpp-protos.sh @@ -2,7 +2,7 @@ set -euxo pipefail -if [ -z "$PROTOC_BIN" ] && [ -z "$DHCPP" ]; then +if [ -z ${PROTOC_BIN:+x} ] && [ -z ${DHCPP:+x} ]; then echo "$0: At least one of the environment variables 'PROTOC_BIN' and 'DHCPP' must be defined, aborting." 1>&2 exit 1 fi From 8c154ff235f055ed802e40b2a48e3373b7ed93bd Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Wed, 8 Nov 2023 16:21:32 -0500 Subject: [PATCH 03/41] C++ Client: some cleanups found while working on Windows port (#4797) --- cpp-client/deephaven/dhclient/CMakeLists.txt | 2 - .../deephaven/client/impl/escape_utils.h | 16 ---- .../dhclient/src/impl/escape_utils.cc | 61 ------------- .../dhcore/immerutil/immer_column_source.h | 18 ++-- .../include/public/deephaven/dhcore/types.h | 10 +-- .../public/deephaven/dhcore/utility/utility.h | 8 +- cpp-client/deephaven/dhcore/src/types.cc | 87 ++++++++++--------- 7 files changed, 58 insertions(+), 144 deletions(-) delete mode 100644 cpp-client/deephaven/dhclient/include/private/deephaven/client/impl/escape_utils.h delete mode 100644 cpp-client/deephaven/dhclient/src/impl/escape_utils.cc diff --git a/cpp-client/deephaven/dhclient/CMakeLists.txt b/cpp-client/deephaven/dhclient/CMakeLists.txt index d69e0867650..84ee59f9a6b 100644 --- a/cpp-client/deephaven/dhclient/CMakeLists.txt +++ b/cpp-client/deephaven/dhclient/CMakeLists.txt @@ -19,14 +19,12 @@ set(ALL_FILES src/impl/aggregate_impl.cc src/impl/client_impl.cc - src/impl/escape_utils.cc src/impl/table_handle_impl.cc src/impl/table_handle_manager_impl.cc src/impl/update_by_operation_impl.cc include/private/deephaven/client/impl/aggregate_impl.h include/private/deephaven/client/impl/client_impl.h - include/private/deephaven/client/impl/escape_utils.h include/private/deephaven/client/impl/table_handle_impl.h include/private/deephaven/client/impl/table_handle_manager_impl.h include/private/deephaven/client/impl/update_by_operation_impl.h diff --git a/cpp-client/deephaven/dhclient/include/private/deephaven/client/impl/escape_utils.h b/cpp-client/deephaven/dhclient/include/private/deephaven/client/impl/escape_utils.h deleted file mode 100644 index 62a3782de2a..00000000000 --- a/cpp-client/deephaven/dhclient/include/private/deephaven/client/impl/escape_utils.h +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending - */ -#pragma once - -#include -#include - -namespace deephaven::client::impl { -class EscapeUtils { -public: - [[nodiscard]] - static std::string EscapeJava(std::string_view s); - static void AppendEscapedJava(std::string_view s, std::string *dest); -}; -} // namespace deephaven::client::impl diff --git a/cpp-client/deephaven/dhclient/src/impl/escape_utils.cc b/cpp-client/deephaven/dhclient/src/impl/escape_utils.cc deleted file mode 100644 index 66f8cf47be7..00000000000 --- a/cpp-client/deephaven/dhclient/src/impl/escape_utils.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending - */ -#include "deephaven/client/impl/escape_utils.h" - -#include -#include -#include - -namespace deephaven::client { -namespace impl { -std::string EscapeUtils::EscapeJava(std::string_view s) { - std::string result; - AppendEscapedJava(s, &result); - return result; -} - -void EscapeUtils::AppendEscapedJava(std::string_view s, std::string *dest) { - typedef std::wstring_convert, char16_t> converter_t; - std::u16string u16s = converter_t().from_bytes(s.begin(), s.end()); - - for (auto u16ch: u16s) { - switch (u16ch) { - case '\b': - dest->append("\\b"); - continue; - case '\f': - dest->append("\\f"); - continue; - case '\n': - dest->append("\\n"); - continue; - case '\r': - dest->append("\\r"); - continue; - case '\t': - dest->append("\\t"); - continue; - case '"': - case '\'': - case '\\': - dest->push_back('\\'); - // The cast is to silence Clang-Tidy. - dest->push_back(static_cast(u16ch)); - continue; - default: - break; - } - - if (u16ch < 32 || u16ch > 0x7f) { - char buffer[16]; // plenty - snprintf(buffer, sizeof(buffer), "\\u%04x", u16ch); - dest->append(buffer); - continue; - } - // The cast is to silence Clang-Tidy. - dest->push_back(static_cast(u16ch)); - } -} -} // namespace impl -} // namespace deephaven::client diff --git a/cpp-client/deephaven/dhcore/include/private/deephaven/dhcore/immerutil/immer_column_source.h b/cpp-client/deephaven/dhcore/include/private/deephaven/dhcore/immerutil/immer_column_source.h index 415cb25ff74..04d6a944dba 100644 --- a/cpp-client/deephaven/dhcore/include/private/deephaven/dhcore/immerutil/immer_column_source.h +++ b/cpp-client/deephaven/dhcore/include/private/deephaven/dhcore/immerutil/immer_column_source.h @@ -57,7 +57,7 @@ struct ImmerColumnSourceImpls { for (const T *current = data_begin; current != data_end; ++current) { auto value = *current; *dest_datap++ = value; - if constexpr(kTypeIsNumeric) { + if constexpr(deephaven::dhcore::DeephavenTraits::kIsNumeric) { if (dest_nullp != nullptr) { *dest_nullp++ = value == deephaven::dhcore::DeephavenTraits::kNullValue; } @@ -74,16 +74,16 @@ struct ImmerColumnSourceImpls { } }; - auto copyOuter = [&src_data, src_null_flags, dest_nullp, ©_data_inner, - ©_nulls_inner](uint64_t srcBegin, uint64_t srcEnd) { - auto src_beginp = src_data.begin() + srcBegin; - auto src_endp = src_data.begin() + srcEnd; + auto copy_outer = [&src_data, src_null_flags, dest_nullp, ©_data_inner, + ©_nulls_inner](uint64_t src_begin, uint64_t src_end) { + auto src_beginp = src_data.begin() + src_begin; + auto src_endp = src_data.begin() + src_end; immer::for_each_chunk(src_beginp, src_endp, copy_data_inner); - if constexpr(!kTypeIsNumeric) { + if constexpr(!deephaven::dhcore::DeephavenTraits::kIsNumeric) { if (dest_nullp != nullptr) { - auto nulls_begin = src_null_flags->begin() + srcBegin; - auto nulls_end = src_null_flags->begin() + srcEnd; + auto nulls_begin = src_null_flags->begin() + src_begin; + auto nulls_end = src_null_flags->begin() + src_end; immer::for_each_chunk(nulls_begin, nulls_end, copy_nulls_inner); } } else { @@ -93,7 +93,7 @@ struct ImmerColumnSourceImpls { (void)copy_nulls_inner; } }; - rows.ForEachInterval(copyOuter); + rows.ForEachInterval(copy_outer); } template diff --git a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h index 5ae3d50cef0..8dd8d837724 100644 --- a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h +++ b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h @@ -386,17 +386,9 @@ class DateTime { [[nodiscard]] int64_t Nanos() const { return nanos_; } - /** - * Used internally to serialize this object to Deephaven. - */ - void StreamIrisRepresentation(std::ostream &result) const; - private: int64_t nanos_ = 0; - friend std::ostream &operator<<(std::ostream &s, const DateTime &o) { - o.StreamIrisRepresentation(s); - return s; - } + friend std::ostream &operator<<(std::ostream &s, const DateTime &o); }; } // namespace deephaven::dhcore diff --git a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h index 7829c75c99a..962de59d28a 100644 --- a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h +++ b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/utility/utility.h @@ -159,10 +159,10 @@ internal::SeparatedListAdaptor separatedList(Iterator begin, #define DEEPHAVEN_PRETTY_FUNCTION __PRETTY_FUNCTION__ #elif defined(__GNUC__) #define DEEPHAVEN_PRETTY_FUNCTION __PRETTY_FUNCTION__ -#elif defined(__MSC_VER) +#elif defined(_MSC_VER) #define DEEPHAVEN_PRETTY_FUNCTION __FUNCSIG__ #else -# error Unsupported compiler +#error "Don't have a specialization of DEEPHAVEN_PRETTY_FUNCTION for your compiler" #endif class DebugInfo { @@ -186,8 +186,8 @@ std::string FormatDebugString(const char *func, const char *file, size_t line, * containing with __PRETTY_FUNCTION__, __FILE__, __LINE__ and the stringified arguments. This is * useful for functions who want to throw an exception with caller information. */ -#define DEEPHAVEN_LOCATION_EXPR(ARGS...) \ - ::deephaven::dhcore::utility::DebugInfo(DEEPHAVEN_PRETTY_FUNCTION, __FILE__, __LINE__, #ARGS),ARGS +#define DEEPHAVEN_LOCATION_EXPR(...) \ + ::deephaven::dhcore::utility::DebugInfo(DEEPHAVEN_PRETTY_FUNCTION, __FILE__, __LINE__, #__VA_ARGS__),__VA_ARGS__ #define DEEPHAVEN_LOCATION_STR(MESSAGE) \ ::deephaven::dhcore::utility::FormatDebugString( \ diff --git a/cpp-client/deephaven/dhcore/src/types.cc b/cpp-client/deephaven/dhcore/src/types.cc index 7933a17bcdd..eae79a51f98 100644 --- a/cpp-client/deephaven/dhcore/src/types.cc +++ b/cpp-client/deephaven/dhcore/src/types.cc @@ -11,45 +11,45 @@ using deephaven::dhcore::utility::Stringf; namespace deephaven::dhcore { -const char16_t DeephavenConstants::kNullChar; - -const float DeephavenConstants::kNullFloat; -const float DeephavenConstants::kNanFloat; -const float DeephavenConstants::kNegInfinityFloat; -const float DeephavenConstants::kPosInfinityFloat; -const float DeephavenConstants::kMinFloat; -const float DeephavenConstants::kMaxFloat; -const float DeephavenConstants::kMinFiniteFloat = +constexpr const char16_t DeephavenConstants::kNullChar; + +constexpr const float DeephavenConstants::kNullFloat; +constexpr const float DeephavenConstants::kNanFloat; +constexpr const float DeephavenConstants::kNegInfinityFloat; +constexpr const float DeephavenConstants::kPosInfinityFloat; +constexpr const float DeephavenConstants::kMinFloat; +constexpr const float DeephavenConstants::kMaxFloat; +/* constexpr clang dislikes */ const float DeephavenConstants::kMinFiniteFloat = std::nextafter(-std::numeric_limits::max(), 0.0F); -const float DeephavenConstants::kMaxFiniteFloat; -const float DeephavenConstants::kMinPosFloat; - -const double DeephavenConstants::kNullDouble; -const double DeephavenConstants::kNanDouble; -const double DeephavenConstants::kNegInfinityDouble; -const double DeephavenConstants::kPosInfinityDouble; -const double DeephavenConstants::kMinDouble; -const double DeephavenConstants::kMaxDouble; -const double DeephavenConstants::kMinFiniteDouble = +constexpr const float DeephavenConstants::kMaxFiniteFloat; +constexpr const float DeephavenConstants::kMinPosFloat; + +constexpr const double DeephavenConstants::kNullDouble; +constexpr const double DeephavenConstants::kNanDouble; +constexpr const double DeephavenConstants::kNegInfinityDouble; +constexpr const double DeephavenConstants::kPosInfinityDouble; +constexpr const double DeephavenConstants::kMinDouble; +constexpr const double DeephavenConstants::kMaxDouble; +/* constexpr clang dislikes */ const double DeephavenConstants::kMinFiniteDouble = std::nextafter(-std::numeric_limits::max(), 0.0); -const double DeephavenConstants::kMaxFiniteDouble; -const double DeephavenConstants::kMinPosDouble; +constexpr const double DeephavenConstants::kMaxFiniteDouble; +constexpr const double DeephavenConstants::kMinPosDouble; -const int8_t DeephavenConstants::kNullByte; -const int8_t DeephavenConstants::kMinByte; -const int8_t DeephavenConstants::kMaxByte; +constexpr const int8_t DeephavenConstants::kNullByte; +constexpr const int8_t DeephavenConstants::kMinByte; +constexpr const int8_t DeephavenConstants::kMaxByte; -const int16_t DeephavenConstants::kNullShort; -const int16_t DeephavenConstants::kMinShort; -const int16_t DeephavenConstants::kMaxShort; +constexpr const int16_t DeephavenConstants::kNullShort; +constexpr const int16_t DeephavenConstants::kMinShort; +constexpr const int16_t DeephavenConstants::kMaxShort; -const int32_t DeephavenConstants::kNullInt; -const int32_t DeephavenConstants::kMinInt; -const int32_t DeephavenConstants::kMaxInt; +constexpr const int32_t DeephavenConstants::kNullInt; +constexpr const int32_t DeephavenConstants::kMinInt; +constexpr const int32_t DeephavenConstants::kMaxInt; -const int64_t DeephavenConstants::kNullLong; -const int64_t DeephavenConstants::kMinLong; -const int64_t DeephavenConstants::kMaxLong; +constexpr const int64_t DeephavenConstants::kNullLong; +constexpr const int64_t DeephavenConstants::kMinLong; +constexpr const int64_t DeephavenConstants::kMaxLong; DateTime DateTime::Parse(std::string_view iso_8601_timestamp) { constexpr const char *kFormatToUse = "%Y-%m-%dT%H:%M:%S%z"; @@ -93,16 +93,17 @@ DateTime::DateTime(int year, int month, int day, int hour, int minute, int secon nanos_ = static_cast(time) + nanos; } -void DateTime::StreamIrisRepresentation(std::ostream &s) const { - size_t oneBillion = 1000000000; - time_t timeSecs = nanos_ / oneBillion; - auto nanos = nanos_ % oneBillion; +std::ostream &operator<<(std::ostream &s, const DateTime &o) { + size_t one_billions = 1'000'000'000; + time_t time_secs = o.nanos_ / one_billions; + auto nanos = o.nanos_ % one_billions; struct tm tm = {}; - gmtime_r(&timeSecs, &tm); - char dateBuffer[32]; // ample - char nanosBuffer[32]; // ample - strftime(dateBuffer, sizeof(dateBuffer), "%FT%T", &tm); - snprintf(nanosBuffer, sizeof(nanosBuffer), "%09zd", nanos); - s << dateBuffer << '.' << nanosBuffer << " UTC"; + gmtime_r(&time_secs, &tm); + char date_buffer[32]; // ample + char nanos_buffer[32]; // ample + strftime(date_buffer, sizeof(date_buffer), "%FT%T", &tm); + snprintf(nanos_buffer, sizeof(nanos_buffer), "%09zd", nanos); + s << date_buffer << '.' << nanos_buffer << " UTC"; + return s; } } // namespace deephaven::client From 475b383161e1cb21040403a869b9c18ef86f053f Mon Sep 17 00:00:00 2001 From: Mike Bender Date: Thu, 9 Nov 2023 16:46:44 -0500 Subject: [PATCH 04/41] Add support for MultiXYErrorBarSeries, MultiOHLCSeries figures (#4763) - Add support for MultiXYErrorBarSeries, MultiOHLCSeries. They weren't wired up at all - Report an error properly for MultiSeries types that are not supported - Change the JsFigure.getErrors() to be a `@JsProperty` instead - Currently not listed in the JS API and not used by the Web UI at all, so I think it's a reasonable change to make - not breaking an existing documented API. - This should be ported to Enterprise as well. I have branch `bender_figure-errors` pushed for this, ready to open up a PR for. - Fixes #4709 - Tested using the code snippets in the ticket --- .../datasets/multiseries/MultiOHLCSeries.java | 20 +++ .../multiseries/MultiXYErrorBarSeries.java | 24 ++++ .../figure/FigureWidgetTranslator.java | 116 +++++++++++++++++- .../web/client/api/widget/plot/JsFigure.java | 9 +- 4 files changed, 166 insertions(+), 3 deletions(-) diff --git a/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiOHLCSeries.java b/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiOHLCSeries.java index cc3085413e9..44b04d1b8c1 100644 --- a/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiOHLCSeries.java +++ b/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiOHLCSeries.java @@ -83,6 +83,26 @@ public OHLCDataSeriesInternal createSeries(String seriesName, final BaseTable t, timeCol, openCol, highCol, lowCol, closeCol); } + public String getTimeCol() { + return timeCol; + } + + public String getOpenCol() { + return openCol; + } + + public String getHighCol() { + return highCol; + } + + public String getLowCol() { + return lowCol; + } + + public String getCloseCol() { + return closeCol; + } + ////////////////////////////// CODE BELOW HERE IS GENERATED -- DO NOT EDIT BY HAND ////////////////////////////// ////////////////////////////// TO REGENERATE RUN GenerateMultiSeries ////////////////////////////// ////////////////////////////// AND THEN RUN GenerateFigureImmutable ////////////////////////////// diff --git a/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiXYErrorBarSeries.java b/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiXYErrorBarSeries.java index 83b0f296930..13cf5911b5c 100644 --- a/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiXYErrorBarSeries.java +++ b/Plot/src/main/java/io/deephaven/plot/datasets/multiseries/MultiXYErrorBarSeries.java @@ -107,6 +107,30 @@ public XYErrorBarDataSeriesInternal createSeries(String seriesName, final BaseTa drawXError, drawYError); } + public boolean getDrawXError() { + return drawXError; + } + + public boolean getDrawYError() { + return drawYError; + } + + public String getXLow() { + return xLow; + } + + public String getXHigh() { + return xHigh; + } + + public String getYLow() { + return yLow; + } + + public String getYHigh() { + return yHigh; + } + ////////////////////////////// CODE BELOW HERE IS GENERATED -- DO NOT EDIT BY HAND ////////////////////////////// ////////////////////////////// TO REGENERATE RUN GenerateMultiSeries ////////////////////////////// ////////////////////////////// AND THEN RUN GenerateFigureImmutable ////////////////////////////// diff --git a/plugin/figure/src/main/java/io/deephaven/figure/FigureWidgetTranslator.java b/plugin/figure/src/main/java/io/deephaven/figure/FigureWidgetTranslator.java index 09c2a92f874..3888321f23d 100644 --- a/plugin/figure/src/main/java/io/deephaven/figure/FigureWidgetTranslator.java +++ b/plugin/figure/src/main/java/io/deephaven/figure/FigureWidgetTranslator.java @@ -32,6 +32,8 @@ import io.deephaven.plot.datasets.multiseries.AbstractMultiSeries; import io.deephaven.plot.datasets.multiseries.AbstractPartitionedTableHandleMultiSeries; import io.deephaven.plot.datasets.multiseries.MultiCatSeries; +import io.deephaven.plot.datasets.multiseries.MultiOHLCSeries; +import io.deephaven.plot.datasets.multiseries.MultiXYErrorBarSeries; import io.deephaven.plot.datasets.multiseries.MultiXYSeries; import io.deephaven.plot.datasets.ohlc.OHLCDataSeriesArray; import io.deephaven.plot.datasets.xy.AbstractXYDataSeries; @@ -518,6 +520,119 @@ private FigureDescriptor.ChartDescriptor translate(ChartImpl chart) { clientSeries.setPointShape(stringMapWithDefault(mergeShapes( multiCatSeries.pointShapeSeriesNameToStringMap(), multiCatSeries.pointShapeSeriesNameToShapeMap()))); + } else if (partitionedTableMultiSeries instanceof MultiXYErrorBarSeries) { + MultiXYErrorBarSeries multiXYErrorBarSeries = + (MultiXYErrorBarSeries) partitionedTableMultiSeries; + + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiXYErrorBarSeries.getX(), SourceType.X, xAxis)); + if (multiXYErrorBarSeries.getDrawXError()) { + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiXYErrorBarSeries.getXLow(), SourceType.X_LOW, xAxis)); + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiXYErrorBarSeries.getXHigh(), SourceType.X_HIGH, + xAxis)); + } + + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiXYErrorBarSeries.getY(), SourceType.Y, yAxis)); + if (multiXYErrorBarSeries.getDrawYError()) { + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiXYErrorBarSeries.getYLow(), SourceType.Y_LOW, yAxis)); + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiXYErrorBarSeries.getYHigh(), SourceType.Y_HIGH, + yAxis)); + } + + clientSeries.setLineColor(stringMapWithDefault(mergeColors( + multiXYErrorBarSeries.lineColorSeriesNameTointMap(), + multiXYErrorBarSeries.lineColorSeriesNameToStringMap(), + multiXYErrorBarSeries.lineColorSeriesNameToPaintMap()))); + clientSeries.setPointColor(stringMapWithDefault(mergeColors( + multiXYErrorBarSeries.pointColorSeriesNameTointMap(), + multiXYErrorBarSeries.pointColorSeriesNameToStringMap(), + multiXYErrorBarSeries.pointColorSeriesNameToPaintMap()))); + clientSeries.setLinesVisible( + boolMapWithDefault( + multiXYErrorBarSeries.linesVisibleSeriesNameToBooleanMap())); + clientSeries.setPointsVisible( + boolMapWithDefault( + multiXYErrorBarSeries.pointsVisibleSeriesNameToBooleanMap())); + clientSeries.setGradientVisible( + boolMapWithDefault( + multiXYErrorBarSeries.gradientVisibleSeriesNameTobooleanMap())); + clientSeries.setPointLabelFormat(stringMapWithDefault( + multiXYErrorBarSeries.pointLabelFormatSeriesNameToStringMap())); + clientSeries.setXToolTipPattern( + stringMapWithDefault( + multiXYErrorBarSeries.xToolTipPatternSeriesNameToStringMap())); + clientSeries.setYToolTipPattern( + stringMapWithDefault( + multiXYErrorBarSeries.yToolTipPatternSeriesNameToStringMap())); + clientSeries.setPointLabel(stringMapWithDefault( + multiXYErrorBarSeries.pointLabelSeriesNameToObjectMap(), + Objects::toString)); + clientSeries.setPointSize(doubleMapWithDefault( + multiXYErrorBarSeries.pointSizeSeriesNameToNumberMap(), + number -> number == null ? null : number.doubleValue())); + + clientSeries.setPointShape(stringMapWithDefault(mergeShapes( + multiXYErrorBarSeries.pointShapeSeriesNameToStringMap(), + multiXYErrorBarSeries.pointShapeSeriesNameToShapeMap()))); + } else if (partitionedTableMultiSeries instanceof MultiOHLCSeries) { + MultiOHLCSeries multiOHLCSeries = + (MultiOHLCSeries) partitionedTableMultiSeries; + + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiOHLCSeries.getTimeCol(), SourceType.TIME, xAxis)); + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiOHLCSeries.getOpenCol(), SourceType.OPEN, yAxis)); + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiOHLCSeries.getCloseCol(), SourceType.CLOSE, yAxis)); + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiOHLCSeries.getHighCol(), SourceType.HIGH, yAxis)); + clientAxes.add(makePartitionedTableSourceDescriptor( + plotHandle, multiOHLCSeries.getLowCol(), SourceType.LOW, yAxis)); + + clientSeries.setLineColor(stringMapWithDefault(mergeColors( + multiOHLCSeries.lineColorSeriesNameTointMap(), + multiOHLCSeries.lineColorSeriesNameToStringMap(), + multiOHLCSeries.lineColorSeriesNameToPaintMap()))); + clientSeries.setPointColor(stringMapWithDefault(mergeColors( + multiOHLCSeries.pointColorSeriesNameTointMap(), + multiOHLCSeries.pointColorSeriesNameToStringMap(), + multiOHLCSeries.pointColorSeriesNameToPaintMap()))); + clientSeries.setLinesVisible( + boolMapWithDefault( + multiOHLCSeries.linesVisibleSeriesNameToBooleanMap())); + clientSeries.setPointsVisible( + boolMapWithDefault( + multiOHLCSeries.pointsVisibleSeriesNameToBooleanMap())); + clientSeries.setGradientVisible( + boolMapWithDefault( + multiOHLCSeries.gradientVisibleSeriesNameTobooleanMap())); + clientSeries.setPointLabelFormat(stringMapWithDefault( + multiOHLCSeries.pointLabelFormatSeriesNameToStringMap())); + clientSeries.setXToolTipPattern( + stringMapWithDefault( + multiOHLCSeries.xToolTipPatternSeriesNameToStringMap())); + clientSeries.setYToolTipPattern( + stringMapWithDefault( + multiOHLCSeries.yToolTipPatternSeriesNameToStringMap())); + clientSeries.setPointLabel(stringMapWithDefault( + multiOHLCSeries.pointLabelSeriesNameToObjectMap(), + Objects::toString)); + clientSeries.setPointSize(doubleMapWithDefault( + multiOHLCSeries.pointSizeSeriesNameToNumberMap(), + number -> number == null ? null : number.doubleValue())); + + clientSeries.setPointShape(stringMapWithDefault(mergeShapes( + multiOHLCSeries.pointShapeSeriesNameToStringMap(), + multiOHLCSeries.pointShapeSeriesNameToShapeMap()))); + } else { + errorList.add( + "OpenAPI presently does not support series of type " + + partitionedTableMultiSeries.getClass()); } } else { errorList.add( @@ -530,7 +645,6 @@ private FigureDescriptor.ChartDescriptor translate(ChartImpl chart) { } else { errorList.add( "OpenAPI presently does not support series of type " + seriesInternal.getClass()); - // TODO handle multi-series, possibly transformed case? } }); }); diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/widget/plot/JsFigure.java b/web/client-api/src/main/java/io/deephaven/web/client/api/widget/plot/JsFigure.java index f373494f1bc..04f2a7989a7 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/widget/plot/JsFigure.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/widget/plot/JsFigure.java @@ -166,6 +166,8 @@ public String toString() { private JsChart[] charts; + private JsArray errors; + private JsTable[] tables; private Map plotHandlesToTables; @@ -201,6 +203,8 @@ public Promise refetch() { .map(chartDescriptor -> new JsChart(chartDescriptor, this)).toArray(JsChart[]::new); JsObject.freeze(charts); + errors = JsObject.freeze(descriptor.getErrorsList().slice()); + return this.tableFetch.fetch(this, response); }).then(tableFetchData -> { // all tables are wired up, need to map them to the series instances @@ -324,8 +328,9 @@ public JsChart[] getCharts() { return charts; } - public String[] getErrors() { - return Js.uncheckedCast(descriptor.getErrorsList().slice()); + @JsProperty + public JsArray getErrors() { + return errors; } /** From c2dd5f37b6d55dd1d7ca7014b9dac98e9059274d Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Thu, 9 Nov 2023 14:43:16 -0800 Subject: [PATCH 05/41] Add better asynchronous impl for TableHandleFuture (#4802) This is in support of #4798. This adds some additional logic to BatchHandler to improve logging in exceptional cases. This also fixes some exceptional cases in TableServiceAsyncImpl. I _expected_ this fixes the intermittent test failures. --- .../deephaven/client/impl/ExportStates.java | 29 +++++- .../client/impl/TableServiceAsyncImpl.java | 93 +++++++++++-------- 2 files changed, 77 insertions(+), 45 deletions(-) diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/ExportStates.java b/java-client/session/src/main/java/io/deephaven/client/impl/ExportStates.java index d9d11f1fc4e..18456c55424 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/ExportStates.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/ExportStates.java @@ -378,10 +378,14 @@ public void onCompleted() { private static final class BatchHandler implements StreamObserver { + private static final Logger log = LoggerFactory.getLogger(BatchHandler.class); + private final Map newStates; + private final Set handled; private BatchHandler(Map newStates) { this.newStates = Objects.requireNonNull(newStates); + this.handled = new HashSet<>(newStates.size()); } @Override @@ -398,24 +402,41 @@ public void onNext(ExportedTableCreationResponse value) { "Not expecting export creation responses for empty tickets"); } final int exportId = ExportTicketHelper.ticketToExportId(value.getResultId().getTicket(), "export"); - final State state = newStates.remove(exportId); + final State state = newStates.get(exportId); if (state == null) { throw new IllegalStateException("Unable to find state for creation response"); } - state.onCreationResponse(value); + if (!handled.add(state)) { + throw new IllegalStateException( + String.format("Server misbehaving, already received response for export id %d", exportId)); + } + try { + state.onCreationResponse(value); + } catch (RuntimeException e) { + log.error("state.onCreationResponse had unexpected exception", e); + state.onCreationError(e); + } } @Override public void onError(Throwable t) { for (State state : newStates.values()) { - state.onCreationError(t); + try { + state.onCreationError(t); + } catch (RuntimeException e) { + log.error("state.onCreationError had unexpected exception, ignoring", e); + } } } @Override public void onCompleted() { for (State state : newStates.values()) { - state.onCreationCompleted(); + try { + state.onCreationCompleted(); + } catch (RuntimeException e) { + log.error("state.onCreationCompleted had unexpected exception, ignoring", e); + } } } } diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/TableServiceAsyncImpl.java b/java-client/session/src/main/java/io/deephaven/client/impl/TableServiceAsyncImpl.java index 230f76b9068..524750d3b26 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/TableServiceAsyncImpl.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/TableServiceAsyncImpl.java @@ -13,10 +13,13 @@ import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.function.Function; final class TableServiceAsyncImpl { @@ -59,72 +62,80 @@ static List executeAsync(ExportService exportServic private static class TableHandleAsyncImpl implements TableHandleFuture, Listener { private final TableSpec tableSpec; + private final CompletableFuture exportFuture; + private final CompletableFuture etcrFuture; private final CompletableFuture future; - private TableHandle handle; - private Export export; TableHandleAsyncImpl(TableSpec tableSpec) { this.tableSpec = Objects.requireNonNull(tableSpec); - this.future = new CompletableFuture<>(); - } - - synchronized void init(Export export) { - this.export = Objects.requireNonNull(export); - // TODO(deephaven-core#4781): Immediately notify server of release when user cancels TableHandleFuture - // this.future.whenComplete((tableHandle, throwable) -> { - // if (isCancelled()) { - // export.release(); - // } - // }); - maybeComplete(); + exportFuture = new CompletableFuture<>(); + etcrFuture = new CompletableFuture<>(); + final CompletableFuture internalFuture = CompletableFuture + .allOf(exportFuture, etcrFuture) + .thenCompose(this::complete); + // thenApply(Function.identity()) _may_ seem extraneous, but we need to ensure separation between the user's + // future and our internal state + future = internalFuture.thenApply(Function.identity()); + future.whenComplete((tableHandle, throwable) -> { + // TODO(deephaven-core#4781): Immediately notify server of release when user cancels TableHandleFuture + if (throwable instanceof CancellationException) { + // Would be better if we could immediately tell server of release, but currently we need to wait for + // etcr/export object. + internalFuture.thenAccept(TableHandle::close); + } + }); } - private void maybeComplete() { - if (handle == null || export == null) { - return; - } - handle.init(export); - if (!future.complete(handle)) { - // If we are unable to complete the future, it means the user cancelled it. It's only at this point in - // time we are able to let the server know that we don't need it anymore. - // TODO(deephaven-core#4781): Immediately notify server of release when user cancels TableHandleFuture - handle.close(); - } - handle = null; - export = null; + void init(Export export) { + // Note: we aren't expecting exceptional completions of exportFuture; we're using a future to make it easy + // to compose with our etcrFuture (which may or may not be completed before exportFuture). + // In exceptional cases where we _don't_ complete exportFuture (for example, the calling code has a runtime + // exception), we know we _haven't_ called io.deephaven.client.impl.ExportServiceRequest#send, so there + // isn't any possibility that we have left open a server-side export. And in those cases, this object isn't + // returned to the user and becomes garbage. The client-side cleanup will be handled in + // io.deephaven.client.impl.ExportServiceRequest#cleanupUnsent. + exportFuture.complete(Objects.requireNonNull(export)); } // -------------------------- - @Override - public void onNext(ExportedTableCreationResponse etcr) { + private CompletionStage complete(Void ignore) { + final Export export = Objects.requireNonNull(exportFuture.getNow(null)); + final ExportedTableCreationResponse etcr = Objects.requireNonNull(etcrFuture.getNow(null)); final TableHandle tableHandle = new TableHandle(tableSpec, null); + tableHandle.init(export); final ResponseAdapter responseAdapter = tableHandle.responseAdapter(); responseAdapter.onNext(etcr); responseAdapter.onCompleted(); final TableHandleException error = tableHandle.error().orElse(null); if (error != null) { - future.completeExceptionally(error); - } else { - // It's possible that onNext comes before #init; either in the case where it was already cached from - // io.deephaven.client.impl.ExportService.export, or where the RPC comes in asynchronously. In either - // case, we need to store handle so it can potentially be completed here, or in init. - synchronized (this) { - handle = tableHandle; - maybeComplete(); - } + // Only available in Java 9+ + // return CompletableFuture.failedStage(error); + final CompletableFuture f = new CompletableFuture<>(); + f.completeExceptionally(error); + return f; } + // Only available in Java 9+ + // return CompletableFuture.completedStage(tableHandle); + return CompletableFuture.completedFuture(tableHandle); + } + + // -------------------------- + + @Override + public void onNext(ExportedTableCreationResponse etcr) { + etcrFuture.complete(etcr); } @Override public void onError(Throwable t) { - future.completeExceptionally(t); + etcrFuture.completeExceptionally(t); } @Override public void onCompleted() { - if (!future.isDone()) { - future.completeExceptionally(new IllegalStateException("onCompleted without future.isDone()")); + if (!etcrFuture.isDone()) { + etcrFuture.completeExceptionally(new IllegalStateException("onCompleted without etcrFuture.isDone()")); } } From 08db1f3bfb478ea92d7c0da4cb677fda647bb052 Mon Sep 17 00:00:00 2001 From: Cristian Ferretti <37232625+jcferretti@users.noreply.github.com> Date: Fri, 10 Nov 2023 11:33:16 -0500 Subject: [PATCH 06/41] Update protoc and cpp client base images. (#4805) --- cpp-client/README.md | 4 ++-- docker/registry/cpp-clients-multi-base/gradle.properties | 2 +- docker/registry/protoc-base/gradle.properties | 4 ++-- proto/proto-backplane-grpc/Dockerfile | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp-client/README.md b/cpp-client/README.md index 95e91afdfb3..0130532bfdd 100644 --- a/cpp-client/README.md +++ b/cpp-client/README.md @@ -37,7 +37,7 @@ on them anymore so we do notguarantee they are current for those platforms. Get the `build-dependencies.sh` script from Deephaven's base images repository at the correct version. You can download it directly from the link - https://github.com/deephaven/deephaven-base-images/raw/23c18c77e4a2431ef7403dd3c96336bd3ecf77d3/cpp-client/build-dependencies.sh + https://github.com/deephaven/deephaven-base-images/raw/47f51e769612785c6f320302a3f4f52bc0cff187/cpp-client/build-dependencies.sh (this script is also used from our automated tools, to generate a docker image to support tests runs; that's why it lives in a separate repo). The script downloads, builds and installs the dependent libraries @@ -64,7 +64,7 @@ on them anymore so we do notguarantee they are current for those platforms. # If the directory already exists from a previous attempt, ensure is clean/empty mkdir -p $DHCPP cd $DHCPP - wget https://github.com/deephaven/deephaven-base-images/raw/23c18c77e4a2431ef7403dd3c96336bd3ecf77d3/cpp-client/build-dependencies.sh + wget https://github.com/deephaven/deephaven-base-images/raw/47f51e769612785c6f320302a3f4f52bc0cff187/cpp-client/build-dependencies.sh chmod +x ./build-dependencies.sh # Maybe edit build-dependencies.sh to reflect choices of build tools and build target, if you # want anything different than defaults; defaults are tested to work, diff --git a/docker/registry/cpp-clients-multi-base/gradle.properties b/docker/registry/cpp-clients-multi-base/gradle.properties index 117188918b5..11c131793a6 100644 --- a/docker/registry/cpp-clients-multi-base/gradle.properties +++ b/docker/registry/cpp-clients-multi-base/gradle.properties @@ -1,4 +1,4 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/cpp-clients-multi-base:latest -deephaven.registry.imageId=ghcr.io/deephaven/cpp-clients-multi-base@sha256:873298d3be934383a84fdc9c2300c23de31db3ea58fbd4648cc70337c107a50d +deephaven.registry.imageId=ghcr.io/deephaven/cpp-clients-multi-base@sha256:e9d3cbd9cf5f95162e559a8e4b78fec20ddbc559048d8b0167db161f8b748d55 deephaven.registry.platform=linux/amd64 diff --git a/docker/registry/protoc-base/gradle.properties b/docker/registry/protoc-base/gradle.properties index 0bfdb321c32..bb208dbf17e 100644 --- a/docker/registry/protoc-base/gradle.properties +++ b/docker/registry/protoc-base/gradle.properties @@ -1,5 +1,5 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/protoc-base:latest -deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:7e6e2ad289e71824b8c72907e38cffe7458c2888aa937e9c8828ab865513e96c +deephaven.registry.imageId=ghcr.io/deephaven/protoc-base@sha256:9e72f4456f4d950c5cb111eab8d601c9653b159221ea3fb6979dbacc3097f06c # TODO(deephaven-base-images#54): arm64 native image for cpp-client-base -deephaven.registry.platform=linux/amd64 \ No newline at end of file +deephaven.registry.platform=linux/amd64 diff --git a/proto/proto-backplane-grpc/Dockerfile b/proto/proto-backplane-grpc/Dockerfile index 505fdec504b..f17353a5f30 100644 --- a/proto/proto-backplane-grpc/Dockerfile +++ b/proto/proto-backplane-grpc/Dockerfile @@ -12,7 +12,7 @@ RUN set -eux; \ mkdir -p /generated/python; \ mkdir -p /generated/cpp; \ /opt/protoc/bin/protoc \ - --plugin=protoc-gen-grpc=/opt/protoc-gen-grpc-java \ + --plugin=protoc-gen-grpc=/opt/java/bin/protoc-gen-grpc-java \ --java_out=/generated/java \ --grpc_out=/generated/grpc \ -I/dependencies \ @@ -64,9 +64,9 @@ RUN set -eux; \ /includes/deephaven/proto/hierarchicaltable.proto \ /includes/deephaven/proto/storage.proto; \ /opt/protoc/bin/protoc \ - --plugin=protoc-gen-go=/opt/protoc-gen-go \ - --plugin=protoc-gen-go-grpc=/opt/protoc-gen-go-grpc \ - --plugin=protoc-gen-cpp_grpc=/opt/cpp/grpc_cpp_plugin \ + --plugin=protoc-gen-go=/opt/go/bin/protoc-gen-go \ + --plugin=protoc-gen-go-grpc=/opt/go/bin/protoc-gen-go-grpc \ + --plugin=protoc-gen-cpp_grpc=/opt/deephaven/bin/grpc_cpp_plugin \ --go_out=/generated/go \ --go-grpc_out=/generated/go \ --go_opt=module=github.com/deephaven/deephaven-core/go \ From 318c2e2f35e9eb211f64a8f2d5e8166994e1ea44 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:19:05 -0700 Subject: [PATCH 07/41] Apply typing.Literal whereever appropriate (#4803) * Apply typing.Literal whereever appropriate * Apply Literal to to_pandas() --- py/server/deephaven/dbc/__init__.py | 4 ++-- py/server/deephaven/pandas.py | 6 +++--- py/server/deephaven/table_factory.py | 2 -- py/server/deephaven/table_listener.py | 9 +++++---- py/server/deephaven/time.py | 9 ++++----- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/py/server/deephaven/dbc/__init__.py b/py/server/deephaven/dbc/__init__.py index 06d7fd1cda8..8ec73080a79 100644 --- a/py/server/deephaven/dbc/__init__.py +++ b/py/server/deephaven/dbc/__init__.py @@ -2,14 +2,14 @@ # Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending # """The dbc package includes the modules and functions for using external databases with Deephaven.""" -from typing import Any +from typing import Any, Literal import deephaven.arrow as dharrow from deephaven import DHError from deephaven.table import Table -def read_sql(conn: Any, query: str, driver: str = "connectorx") -> Table: +def read_sql(conn: Any, query: str, driver: Literal["odbc", "adbc", "connectorx"] = "connectorx") -> Table: """Executes the provided SQL query via a supported driver and returns a Deephaven table. Args: diff --git a/py/server/deephaven/pandas.py b/py/server/deephaven/pandas.py index 5181428fa9c..14e74ece14c 100644 --- a/py/server/deephaven/pandas.py +++ b/py/server/deephaven/pandas.py @@ -3,7 +3,7 @@ # """ This module supports the conversion between Deephaven tables and pandas DataFrames. """ -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Literal import jpy import numpy as np @@ -112,8 +112,8 @@ def _column_to_series(table: Table, col_def: Column, conv_null: bool) -> pd.Seri } -def to_pandas(table: Table, cols: List[str] = None, dtype_backend: str = None, conv_null: bool = True) -> \ - pd.DataFrame: +def to_pandas(table: Table, cols: List[str] = None, dtype_backend: Literal[None, "pyarrow", "numpy_nullable"] = None, + conv_null: bool = True) -> pd.DataFrame: """Produces a pandas DataFrame from a table. Note that the **entire table** is going to be cloned into memory, so the total number of entries in the table diff --git a/py/server/deephaven/table_factory.py b/py/server/deephaven/table_factory.py index 5a06cb6cfba..9e01cda8147 100644 --- a/py/server/deephaven/table_factory.py +++ b/py/server/deephaven/table_factory.py @@ -36,7 +36,6 @@ _JFunctionGeneratedTableFactory = jpy.get_type("io.deephaven.engine.table.impl.util.FunctionGeneratedTableFactory") - def empty_table(size: int) -> Table: """Creates a table with rows but no columns. @@ -395,7 +394,6 @@ def function_generated_table(table_generator: Callable[..., Table], if exec_ctx is None: raise ValueError("No execution context is available and exec_ctx was not provided! ") - def table_generator_function(): with exec_ctx: result = table_generator(*args, **kwargs) diff --git a/py/server/deephaven/table_listener.py b/py/server/deephaven/table_listener.py index ec66b833436..f0959cb20f4 100644 --- a/py/server/deephaven/table_listener.py +++ b/py/server/deephaven/table_listener.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from functools import wraps from inspect import signature -from typing import Callable, Union, List, Generator, Dict, Optional +from typing import Callable, Union, List, Generator, Dict, Optional, Literal import jpy import numpy @@ -237,7 +237,8 @@ def modified_columns(self) -> List[str]: return list(cols) if cols else [] -def _do_locked(ug: Union[UpdateGraph, Table], f: Callable, lock_type="shared") -> None: +def _do_locked(ug: Union[UpdateGraph, Table], f: Callable, lock_type: Literal["shared","exclusive"] = "shared") -> \ + None: """Executes a function while holding the UpdateGraph (UG) lock. Holding the UG lock ensures that the contents of a table will not change during a computation, but holding the lock also prevents table updates from happening. The lock should be held for as little @@ -308,7 +309,7 @@ def _wrap_listener_obj(t: Table, listener: TableListener): def listen(t: Table, listener: Union[Callable, TableListener], description: str = None, do_replay: bool = False, - replay_lock: str = "shared"): + replay_lock: Literal["shared", "exclusive"] = "shared"): """This is a convenience function that creates a TableListenerHandle object and immediately starts it to listen for table updates. @@ -372,7 +373,7 @@ def __init__(self, t: Table, listener: Union[Callable, TableListener], descripti self.started = False - def start(self, do_replay: bool = False, replay_lock: str = "shared") -> None: + def start(self, do_replay: bool = False, replay_lock: Literal["shared", "exclusive"] = "shared") -> None: """Start the listener by registering it with the table and listening for updates. Args: diff --git a/py/server/deephaven/time.py b/py/server/deephaven/time.py index baf3a421384..11c8897dfd9 100644 --- a/py/server/deephaven/time.py +++ b/py/server/deephaven/time.py @@ -7,7 +7,7 @@ from __future__ import annotations import datetime -from typing import Union, Optional +from typing import Union, Optional, Literal import jpy import numpy @@ -32,7 +32,7 @@ # region Clock -def dh_now(system: bool = False, resolution: str = 'ns') -> Instant: +def dh_now(system: bool = False, resolution: Literal["ns", "ms"] = "ns") -> Instant: """ Provides the current datetime according to the current Deephaven clock. Query strings should use the built-in "now" function instead of this function. @@ -43,9 +43,8 @@ def dh_now(system: bool = False, resolution: str = 'ns') -> Instant: system (bool): True to use the system clock; False to use the default clock. Under most circumstances, the default clock will return the current system time, but during replay simulations, the default clock can return the replay time. - - resolution (str): The resolution of the returned time. The default 'ns' will return nanosecond resolution times - if possible. 'ms' will return millisecond resolution times. + resolution (str): The resolution of the returned time. The default "ns" will return nanosecond resolution times + if possible. "ms" will return millisecond resolution times. Returns: Instant From d162c89e55b8d3f65cf293a1bc2ea5b9dee6571a Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Fri, 10 Nov 2023 15:19:46 -0600 Subject: [PATCH 08/41] Bug fixes for Parquet DATE and TIME, and improved support for TIMESTAMP (#4801) --- .../engine/table/impl/CodecLookup.java | 2 + .../java/io/deephaven/time/DateTimeUtils.java | 53 +++++++++ .../io/deephaven/time/TestDateTimeUtils.java | 16 +++ .../parquet/base/ParquetFileReader.java | 63 +++++------ .../parquet/table/ParquetSchemaReader.java | 17 ++- .../io/deephaven/parquet/table/TypeInfos.java | 101 ++++++++---------- .../table/location/ParquetColumnLocation.java | 7 +- .../pagestore/topage/ToLocalDateTimePage.java | 100 +++++++++++++++++ .../table/pagestore/topage/ToTimePage.java | 12 +-- .../parquet/table/transfer/DateTransfer.java | 4 +- .../transfer/LocalDateTimeArrayTransfer.java | 44 ++++++++ .../table/transfer/LocalDateTimeTransfer.java | 37 +++++++ .../transfer/LocalDateTimeVectorTransfer.java | 40 +++++++ .../parquet/table/transfer/TimeTransfer.java | 8 +- .../table/transfer/TransferObject.java | 10 ++ extensions/parquet/table/src/test/e0.py | 3 +- extensions/parquet/table/src/test/e1.py | 3 +- extensions/parquet/table/src/test/e2.py | 3 +- .../table/ParquetTableReadWriteTest.java | 32 ++++-- .../src/test/resources/e0/brotli.parquet | 4 +- .../table/src/test/resources/e0/gzip.parquet | 4 +- .../table/src/test/resources/e0/lz4.parquet | 4 +- .../src/test/resources/e0/snappy.parquet | 4 +- .../test/resources/e0/uncompressed.parquet | 4 +- .../table/src/test/resources/e0/zstd.parquet | 4 +- .../src/test/resources/e1/brotli.parquet | 4 +- .../table/src/test/resources/e1/gzip.parquet | 4 +- .../table/src/test/resources/e1/lz4.parquet | 4 +- .../src/test/resources/e1/snappy.parquet | 4 +- .../test/resources/e1/uncompressed.parquet | 4 +- .../table/src/test/resources/e1/zstd.parquet | 4 +- .../src/test/resources/e2/brotli.parquet | 4 +- .../table/src/test/resources/e2/gzip.parquet | 4 +- .../table/src/test/resources/e2/lz4.parquet | 4 +- .../src/test/resources/e2/snappy.parquet | 4 +- .../test/resources/e2/uncompressed.parquet | 4 +- .../table/src/test/resources/e2/zstd.parquet | 4 +- py/server/tests/test_parquet.py | 56 ++++++---- .../ReplicateParquetTransferObjects.java | 27 +++++ 39 files changed, 527 insertions(+), 183 deletions(-) create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeVectorTransfer.java diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/CodecLookup.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/CodecLookup.java index f60cad3f99e..f73d118cc17 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/CodecLookup.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/CodecLookup.java @@ -20,6 +20,7 @@ import java.time.Instant; import java.time.LocalDate; import java.time.LocalTime; +import java.time.LocalDateTime; /** * Utility class to concentrate {@link ObjectCodec} lookups. @@ -76,6 +77,7 @@ private static boolean noCodecRequired(@NotNull final Class dataType) { dataType == Instant.class || dataType == LocalDate.class || dataType == LocalTime.class || + dataType == LocalDateTime.class || dataType == String.class || // A BigDecimal column maps to a logical type of decimal, with // appropriate precision and scale calculated from column data, diff --git a/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java b/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java index 4c1a3846e68..7f86e80403e 100644 --- a/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java +++ b/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java @@ -21,6 +21,7 @@ import java.time.zone.ZoneRulesException; import java.util.Date; import java.util.Objects; +import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -981,6 +982,21 @@ public static long epochNanos(@Nullable final ZonedDateTime dateTime) { return safeComputeNanos(dateTime.toEpochSecond(), dateTime.getNano()); } + /** + * Returns nanoseconds from the Epoch for a {@link LocalDateTime} value in UTC timezone. + * + * @param localDateTime the local date time to compute the Epoch offset for + * @return nanoseconds since Epoch, or a NULL_LONG value if the local date time is null + */ + @ScriptApi + public static long epochNanosUTC(@Nullable final LocalDateTime localDateTime) { + if (localDateTime == null) { + return NULL_LONG; + } + return TimeUnit.SECONDS.toNanos(localDateTime.toEpochSecond(ZoneOffset.UTC)) + + localDateTime.toLocalTime().getNano(); + } + /** * Returns microseconds from the Epoch for an {@link Instant} value. * @@ -1399,6 +1415,43 @@ public static ZonedDateTime excelToZonedDateTime(final double excel, @Nullable f return epochMillisToZonedDateTime(excelTimeToEpochMillis(excel, timeZone), timeZone); } + /** + * Converts nanoseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. + * + * @param nanos nanoseconds since Epoch + * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input nanoseconds from the + * Epoch converted to a {@link LocalDateTime} in UTC timezone + */ + public static @Nullable LocalDateTime epochNanosToLocalDateTimeUTC(final long nanos) { + return nanos == NULL_LONG ? null + : LocalDateTime.ofEpochSecond(nanos / 1_000_000_000L, (int) (nanos % 1_000_000_000L), ZoneOffset.UTC); + } + + /** + * Converts microseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. + * + * @param micros microseconds since Epoch + * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input microseconds from the + * Epoch converted to a {@link LocalDateTime} in UTC timezone + */ + public static @Nullable LocalDateTime epochMicrosToLocalDateTimeUTC(final long micros) { + return micros == NULL_LONG ? null + : LocalDateTime.ofEpochSecond(micros / 1_000_000L, (int) ((micros % 1_000_000L) * MICRO), + ZoneOffset.UTC); + } + + /** + * Converts milliseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. + * + * @param millis milliseconds since Epoch + * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input milliseconds from the + * Epoch converted to a {@link LocalDateTime} in UTC timezone + */ + public static @Nullable LocalDateTime epochMillisToLocalDateTimeUTC(final long millis) { + return millis == NULL_LONG ? null + : LocalDateTime.ofEpochSecond(millis / 1_000L, (int) ((millis % 1_000L) * MILLI), ZoneOffset.UTC); + } + // endregion // region Arithmetic diff --git a/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java b/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java index c8fbfc2d78d..4e3e6b3d501 100644 --- a/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java +++ b/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java @@ -1391,6 +1391,10 @@ public void testEpochNanos() { TestCase.assertEquals(nanos, DateTimeUtils.epochNanos(dt3)); TestCase.assertEquals(NULL_LONG, DateTimeUtils.epochNanos((ZonedDateTime) null)); + + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(nanos, DateTimeUtils.epochNanosUTC(ldt)); + TestCase.assertEquals(NULL_LONG, DateTimeUtils.epochNanosUTC(null)); } public void testEpochMicros() { @@ -1456,6 +1460,10 @@ public void testEpochNanosTo() { TestCase.assertEquals(dt3, DateTimeUtils.epochNanosToZonedDateTime(nanos, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochNanosToZonedDateTime(NULL_LONG, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochNanosToZonedDateTime(nanos, null)); + + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(ldt, DateTimeUtils.epochNanosToLocalDateTimeUTC(nanos)); + TestCase.assertNull(DateTimeUtils.epochNanosToLocalDateTimeUTC(NULL_LONG)); } public void testEpochMicrosTo() { @@ -1471,6 +1479,10 @@ public void testEpochMicrosTo() { TestCase.assertEquals(dt3, DateTimeUtils.epochMicrosToZonedDateTime(micros, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMicrosToZonedDateTime(NULL_LONG, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMicrosToZonedDateTime(micros, null)); + + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(ldt, DateTimeUtils.epochMicrosToLocalDateTimeUTC(micros)); + TestCase.assertNull(DateTimeUtils.epochMicrosToLocalDateTimeUTC(NULL_LONG)); } public void testEpochMillisTo() { @@ -1486,6 +1498,10 @@ public void testEpochMillisTo() { TestCase.assertEquals(dt3, DateTimeUtils.epochMillisToZonedDateTime(millis, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMillisToZonedDateTime(NULL_LONG, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMillisToZonedDateTime(millis, null)); + + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(ldt, DateTimeUtils.epochMillisToLocalDateTimeUTC(millis)); + TestCase.assertNull(DateTimeUtils.epochMillisToLocalDateTimeUTC(NULL_LONG)); } public void testEpochSecondsTo() { diff --git a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java index 3621d69b5a0..1db38879652 100644 --- a/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java +++ b/extensions/parquet/base/src/main/java/io/deephaven/parquet/base/ParquetFileReader.java @@ -236,23 +236,13 @@ private static void buildChildren(Types.GroupBuilder builder, Iterator> visit(final LogicalTypeAnnotation.TimeLogicalTypeAnnot @Override public Optional> visit( final LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { - // TODO(deephaven-core#976): Unable to read parquet TimestampLogicalTypeAnnotation that is not adjusted - // to UTC - if (timestampLogicalType.isAdjustedToUTC()) { - switch (timestampLogicalType.getUnit()) { - case MILLIS: - case MICROS: - case NANOS: - return Optional.of(Instant.class); - } + switch (timestampLogicalType.getUnit()) { + case MILLIS: + case MICROS: + case NANOS: + // TIMESTAMP fields if adjusted to UTC are read as Instants, else as LocalDatetimes. + return timestampLogicalType.isAdjustedToUTC() ? Optional.of(Instant.class) + : Optional.of(LocalDateTime.class); } errorString.setValue("TimestampLogicalType, isAdjustedToUTC=" + timestampLogicalType.isAdjustedToUTC() + ", unit=" + timestampLogicalType.getUnit()); diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java index 37a78a7a360..48d43f44dc4 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/TypeInfos.java @@ -26,6 +26,7 @@ import java.time.Instant; import java.time.LocalDate; import java.time.LocalTime; +import java.time.LocalDateTime; import java.util.*; import java.util.function.Supplier; @@ -51,6 +52,7 @@ public class TypeInfos { BigIntegerType.INSTANCE, LocalDateType.INSTANCE, LocalTimeType.INSTANCE, + LocalDateTimeType.INSTANCE, }; private static final Map, TypeInfo> BY_CLASS; @@ -150,10 +152,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.BINARY, required, repeating) .as(LogicalTypeAnnotation.decimalType(precisionAndScale.scale, precisionAndScale.precision)); } @@ -193,10 +192,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.INT32, required, repeating).as(LogicalTypeAnnotation.intType(32, true)); } } @@ -213,10 +209,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.INT64, required, repeating); } } @@ -233,10 +226,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.INT32, required, repeating).as(LogicalTypeAnnotation.intType(16, true)); } } @@ -253,10 +243,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.BOOLEAN, required, repeating); } } @@ -273,10 +260,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.FLOAT, required, repeating); } } @@ -293,10 +277,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.DOUBLE, required, repeating); } } @@ -313,10 +294,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.INT32, required, repeating).as(LogicalTypeAnnotation.intType(16, false)); } } @@ -333,10 +311,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.INT32, required, repeating).as(LogicalTypeAnnotation.intType(8, true)); } } @@ -352,10 +327,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.BINARY, required, repeating) .as(LogicalTypeAnnotation.stringType()); } @@ -372,15 +344,31 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { + // Write instants as Parquet TIMESTAMP(isAdjustedToUTC = true, unit = NANOS) return type(PrimitiveTypeName.INT64, required, repeating) .as(LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS)); } } + private enum LocalDateTimeType implements TypeInfo { + INSTANCE; + + private static final Set> clazzes = Collections.singleton(LocalDateTime.class); + + @Override + public Set> getTypes() { + return clazzes; + } + + @Override + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { + // Write LocalDateTime as Parquet TIMESTAMP(isAdjustedToUTC = false, unit = NANOS) + return type(PrimitiveTypeName.INT64, required, repeating) + .as(LogicalTypeAnnotation.timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS)); + } + } + private enum LocalDateType implements TypeInfo { INSTANCE; @@ -392,10 +380,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.INT32, required, repeating) .as(LogicalTypeAnnotation.dateType()); } @@ -412,10 +397,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { - if (!isValidFor(dataType)) { - throw new IllegalArgumentException("Invalid data type " + dataType); - } + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { // Always write in (isAdjustedToUTC = true, unit = NANOS) format return type(PrimitiveTypeName.INT64, required, repeating) .as(LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS)); @@ -439,7 +421,7 @@ public Set> getTypes() { } @Override - public PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { + public PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { return type(PrimitiveTypeName.BINARY, required, repeating) .as(LogicalTypeAnnotation.decimalType(0, 1)); } @@ -454,6 +436,17 @@ default boolean isValidFor(Class clazz) { return getTypes().contains(clazz); } + default PrimitiveBuilder getBuilderImpl(boolean required, boolean repeating, Class dataType) { + throw new UnsupportedOperationException("Implement this method if using the default getBuilder()"); + } + + default PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType) { + if (!isValidFor(dataType)) { + throw new IllegalArgumentException("Invalid data type " + dataType); + } + return getBuilderImpl(required, repeating, dataType); + } + default Type createSchemaType( @NotNull final ColumnDefinition columnDefinition, @NotNull final ParquetInstructions instructions) { @@ -486,8 +479,6 @@ default Type createSchemaType( builder.named("item")).named(parquetColumnName)) .as(LogicalTypeAnnotation.listType()).named(parquetColumnName); } - - PrimitiveBuilder getBuilder(boolean required, boolean repeating, Class dataType); } private static class CodecType implements TypeInfo { diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java index 1adf7044ce7..40a4aaf6fdd 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/location/ParquetColumnLocation.java @@ -723,13 +723,10 @@ private static class LogicalTypeVisitor @Override public Optional> visit( final LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) { - // TODO(deephaven-core#976): Unable to read parquet TimestampLogicalTypeAnnotation that is not adjusted - // to UTC if (timestampLogicalType.isAdjustedToUTC()) { - return Optional - .of(ToInstantPage.create(componentType, timestampLogicalType.getUnit())); + return Optional.of(ToInstantPage.create(componentType, timestampLogicalType.getUnit())); } - return Optional.empty(); + return Optional.of(ToLocalDateTimePage.create(componentType, timestampLogicalType.getUnit())); } @Override diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java new file mode 100644 index 00000000000..8f906915d56 --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java @@ -0,0 +1,100 @@ +/** + * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.parquet.table.pagestore.topage; + +import io.deephaven.chunk.ChunkType; +import io.deephaven.chunk.attributes.Any; +import io.deephaven.time.DateTimeUtils; +import io.deephaven.util.QueryConstants; +import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.jetbrains.annotations.NotNull; + +import java.time.LocalDateTime; +import java.util.function.LongFunction; + +/** + * Used to convert Parquet TIMESTAMP values with {@code isAdjustedToUTC=false} to {@link LocalDateTime}. Ref: ... + */ +public class ToLocalDateTimePage implements ToPage { + + @SuppressWarnings("rawtypes") + private static final ToPage MILLIS_INSTANCE = new ToLocalDateTimePageFromMillis(); + @SuppressWarnings("rawtypes") + private static final ToPage MICROS_INSTANCE = new ToLocalDateTimePageFromMicros(); + @SuppressWarnings("rawtypes") + private static final ToPage NANOS_INSTANCE = new ToLocalDateTimePageFromNanos(); + + @SuppressWarnings("unchecked") + public static ToPage create(@NotNull final Class nativeType, + @NotNull final LogicalTypeAnnotation.TimeUnit unit) { + if (LocalDateTime.class.equals(nativeType)) { + switch (unit) { + case MILLIS: + return MILLIS_INSTANCE; + case MICROS: + return MICROS_INSTANCE; + case NANOS: + return NANOS_INSTANCE; + default: + throw new IllegalArgumentException("Unsupported unit=" + unit); + } + } + throw new IllegalArgumentException( + "The native type for a LocalDateTime column is " + nativeType.getCanonicalName()); + } + + ToLocalDateTimePage() {} + + @Override + @NotNull + public final Class getNativeType() { + return LocalDateTime.class; + } + + @Override + @NotNull + public final ChunkType getChunkType() { + return ChunkType.Object; + } + + @Override + @NotNull + public final Object nullValue() { + return QueryConstants.NULL_LONG_BOXED; + } + + private static LocalDateTime[] convertResultHelper(@NotNull final Object result, + @NotNull final LongFunction unitToLocalDateTime) { + final long[] from = (long[]) result; + final LocalDateTime[] to = new LocalDateTime[from.length]; + + for (int i = 0; i < from.length; ++i) { + to[i] = unitToLocalDateTime.apply(from[i]); + } + return to; + } + + private static final class ToLocalDateTimePageFromMillis extends ToLocalDateTimePage { + @Override + public LocalDateTime[] convertResult(@NotNull final Object result) { + return convertResultHelper(result, DateTimeUtils::epochMillisToLocalDateTimeUTC); + } + } + + private static final class ToLocalDateTimePageFromMicros extends ToLocalDateTimePage { + @Override + public LocalDateTime[] convertResult(@NotNull final Object result) { + return convertResultHelper(result, DateTimeUtils::epochMicrosToLocalDateTimeUTC); + } + } + + private static final class ToLocalDateTimePageFromNanos extends ToLocalDateTimePage { + @Override + public LocalDateTime[] convertResult(@NotNull final Object result) { + return convertResultHelper(result, DateTimeUtils::epochNanosToLocalDateTimeUTC); + } + } + +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java index 604f09a6794..fd64896b4c8 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToTimePage.java @@ -11,6 +11,7 @@ import org.jetbrains.annotations.NotNull; import java.time.LocalTime; +import java.util.function.LongFunction; public class ToTimePage implements ToPage { @@ -82,20 +83,13 @@ public final Object nullValue() { return QueryConstants.NULL_LONG_BOXED; } - /** - * Convert a {@code long} value in the units of this page (can be micros or nanos) to a {@link LocalTime} - */ - interface ToLocalTimeFromUnits { - LocalTime apply(final long value); - } - static LocalTime[] convertResultHelper(@NotNull final Object result, - final ToLocalTimeFromUnits toLocalTimeFromUnits) { + @NotNull final LongFunction unitToLocalTime) { final long[] from = (long[]) result; final LocalTime[] to = new LocalTime[from.length]; for (int i = 0; i < from.length; ++i) { - to[i] = toLocalTimeFromUnits.apply(from[i]); + to[i] = unitToLocalTime.apply(from[i]); } return to; } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/DateTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/DateTransfer.java index ad0fb4b8e55..ea124380b33 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/DateTransfer.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/DateTransfer.java @@ -3,7 +3,7 @@ */ package io.deephaven.parquet.table.transfer; -import io.deephaven.chunk.WritableObjectChunk; +import io.deephaven.chunk.ObjectChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSet; import io.deephaven.engine.table.ColumnSource; @@ -12,7 +12,7 @@ import java.time.LocalDate; -final class DateTransfer extends IntCastablePrimitiveTransfer> { +final class DateTransfer extends IntCastablePrimitiveTransfer> { DateTransfer(@NotNull final ColumnSource columnSource, @NotNull final RowSet tableRowSet, final int targetSize) { super(columnSource, tableRowSet, targetSize); } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java new file mode 100644 index 00000000000..16ed5fc6d0f --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java @@ -0,0 +1,44 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ +/* + * --------------------------------------------------------------------------------------------------------------------- + * AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit InstantArrayTransfer and regenerate + * --------------------------------------------------------------------------------------------------------------------- + */ +package io.deephaven.parquet.table.transfer; + +import io.deephaven.engine.rowset.RowSequence; +import io.deephaven.engine.table.ColumnSource; +import io.deephaven.time.DateTimeUtils; +import org.jetbrains.annotations.NotNull; + +import java.nio.LongBuffer; +import java.time.LocalDateTime; + +final class LocalDateTimeArrayTransfer + extends PrimitiveArrayAndVectorTransfer { + // We encode LocalDateTime as primitive longs + LocalDateTimeArrayTransfer(@NotNull final ColumnSource columnSource, @NotNull final RowSequence tableRowSet, + final int targetPageSizeInBytes) { + super(columnSource, tableRowSet, targetPageSizeInBytes / Long.BYTES, targetPageSizeInBytes, + LongBuffer.allocate(targetPageSizeInBytes / Long.BYTES), Long.BYTES); + } + + @Override + int getSize(final LocalDateTime @NotNull [] data) { + return data.length; + } + + @Override + void resizeBuffer(final int length) { + buffer = LongBuffer.allocate(length); + } + + @Override + void copyToBuffer(@NotNull final EncodedData data) { + for (final LocalDateTime t : data.encodedValues) { + buffer.put(DateTimeUtils.epochNanosUTC(t)); + } + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java new file mode 100644 index 00000000000..38ca4a338ea --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ +/* + * --------------------------------------------------------------------------------------------------------------------- + * AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit TimeTransfer and regenerate + * --------------------------------------------------------------------------------------------------------------------- + */ +package io.deephaven.parquet.table.transfer; + +import io.deephaven.chunk.ObjectChunk; +import io.deephaven.chunk.attributes.Values; +import io.deephaven.engine.rowset.RowSequence; +import io.deephaven.engine.table.ColumnSource; +import io.deephaven.time.DateTimeUtils; +import org.jetbrains.annotations.NotNull; + +import java.nio.LongBuffer; +import java.time.LocalDateTime; + +final class LocalDateTimeTransfer extends GettingPrimitiveTransfer, LongBuffer> { + + LocalDateTimeTransfer(@NotNull final ColumnSource columnSource, @NotNull final RowSequence tableRowSet, + final int targetPageSizeInBytes) { + super(columnSource, tableRowSet, + LongBuffer.allocate(Math.toIntExact(Math.min(tableRowSet.size(), targetPageSizeInBytes / Long.BYTES))), + Math.toIntExact(Math.min(tableRowSet.size(), targetPageSizeInBytes / Long.BYTES))); + } + + @Override + void copyAllFromChunkToBuffer() { + final int chunkSize = chunk.size(); + for (int chunkIdx = 0; chunkIdx < chunkSize; ++chunkIdx) { + buffer.put(DateTimeUtils.epochNanosUTC(chunk.get(chunkIdx))); + } + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeVectorTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeVectorTransfer.java new file mode 100644 index 00000000000..0fdb16f59c3 --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeVectorTransfer.java @@ -0,0 +1,40 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ +/* + * --------------------------------------------------------------------------------------------------------------------- + * AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit InstantVectorTransfer and regenerate + * --------------------------------------------------------------------------------------------------------------------- + */ +package io.deephaven.parquet.table.transfer; + +import io.deephaven.engine.primitive.iterator.CloseableIterator; +import io.deephaven.engine.rowset.RowSequence; +import io.deephaven.engine.table.ColumnSource; +import io.deephaven.time.DateTimeUtils; +import io.deephaven.vector.ObjectVector; +import org.jetbrains.annotations.NotNull; + +import java.nio.LongBuffer; +import java.time.LocalDateTime; + +final class LocalDateTimeVectorTransfer extends PrimitiveVectorTransfer, LongBuffer> { + // We encode LocalDateTime as primitive longs + LocalDateTimeVectorTransfer(@NotNull final ColumnSource columnSource, @NotNull final RowSequence tableRowSet, + final int targetPageSizeInBytes) { + super(columnSource, tableRowSet, targetPageSizeInBytes / Long.BYTES, targetPageSizeInBytes, + LongBuffer.allocate(targetPageSizeInBytes / Long.BYTES), Long.BYTES); + } + + @Override + void resizeBuffer(final int length) { + buffer = LongBuffer.allocate(length); + } + + @Override + void copyToBuffer(@NotNull final EncodedData> data) { + try (final CloseableIterator dataIterator = data.encodedValues.iterator()) { + dataIterator.forEachRemaining((LocalDateTime t) -> buffer.put(DateTimeUtils.epochNanosUTC(t))); + } + } +} diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/TimeTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/TimeTransfer.java index 9021af1511f..48c396a99a4 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/TimeTransfer.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/TimeTransfer.java @@ -1,6 +1,9 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ package io.deephaven.parquet.table.transfer; -import io.deephaven.chunk.WritableObjectChunk; +import io.deephaven.chunk.ObjectChunk; import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSequence; import io.deephaven.engine.table.ColumnSource; @@ -10,7 +13,7 @@ import java.nio.LongBuffer; import java.time.LocalTime; -final class TimeTransfer extends GettingPrimitiveTransfer, LongBuffer> { +final class TimeTransfer extends GettingPrimitiveTransfer, LongBuffer> { TimeTransfer(@NotNull final ColumnSource columnSource, @NotNull final RowSequence tableRowSet, final int targetPageSizeInBytes) { @@ -23,7 +26,6 @@ final class TimeTransfer extends GettingPrimitiveTransfer TransferObject create( if (columnType == LocalTime.class) { return new TimeTransfer(columnSource, tableRowSet, instructions.getTargetPageSize()); } + if (columnType == LocalDateTime.class) { + return new LocalDateTimeTransfer(columnSource, tableRowSet, instructions.getTargetPageSize()); + } @Nullable final Class componentType = columnSource.getComponentType(); if (columnType.isArray()) { @@ -140,6 +144,9 @@ static TransferObject create( if (componentType == LocalTime.class) { return new TimeArrayTransfer(columnSource, tableRowSet, instructions.getTargetPageSize()); } + if (componentType == LocalDateTime.class) { + return new LocalDateTimeArrayTransfer(columnSource, tableRowSet, instructions.getTargetPageSize()); + } // TODO(deephaven-core#4612): Handle arrays of BigDecimal and if explicit codec provided } if (Vector.class.isAssignableFrom(columnType)) { @@ -183,6 +190,9 @@ static TransferObject create( if (componentType == LocalTime.class) { return new TimeVectorTransfer(columnSource, tableRowSet, instructions.getTargetPageSize()); } + if (componentType == LocalDateTime.class) { + return new LocalDateTimeVectorTransfer(columnSource, tableRowSet, instructions.getTargetPageSize()); + } // TODO(deephaven-core#4612): Handle vectors of BigDecimal and if explicit codec provided } diff --git a/extensions/parquet/table/src/test/e0.py b/extensions/parquet/table/src/test/e0.py index f6cc32c323d..09416337baa 100644 --- a/extensions/parquet/table/src/test/e0.py +++ b/extensions/parquet/table/src/test/e0.py @@ -8,8 +8,7 @@ "c": np.arange(3, 6).astype("u1"), "d": np.arange(4.0, 7.0, dtype="float64"), "e": [True, False, True], - # TODO(deephaven-core#976): Unable to read parquet TimestampLogicalTypeAnnotation that is not adjusted to UTC - # "f": pd.date_range("20130101", periods=3), + "f": pd.date_range("20130101", periods=3), "g": pd.date_range("20130101", periods=3, tz="US/Eastern"), "h": pd.Categorical(list("abc")), "i": pd.Categorical(list("abc"), ordered=True), diff --git a/extensions/parquet/table/src/test/e1.py b/extensions/parquet/table/src/test/e1.py index 450179b49e1..408c327f3a8 100644 --- a/extensions/parquet/table/src/test/e1.py +++ b/extensions/parquet/table/src/test/e1.py @@ -8,8 +8,7 @@ "c": np.arange(3, 6).astype("u1"), "d": np.arange(4.0, 7.0, dtype="float64"), "e": [True, False, True], - # TODO(deephaven-core#976): Unable to read parquet TimestampLogicalTypeAnnotation that is not adjusted to UTC - # "f": pd.date_range("20130101", periods=3), + "f": pd.date_range("20130101", periods=3), "g": pd.date_range("20130101", periods=3, tz="US/Eastern"), "h": pd.Categorical(list("abc")), "i": pd.Categorical(list("abc"), ordered=True), diff --git a/extensions/parquet/table/src/test/e2.py b/extensions/parquet/table/src/test/e2.py index 9fa3560a1e0..446fb28519a 100644 --- a/extensions/parquet/table/src/test/e2.py +++ b/extensions/parquet/table/src/test/e2.py @@ -8,8 +8,7 @@ "c": np.arange(3, 6).astype("u1"), "d": np.arange(4.0, 7.0, dtype="float64"), "e": [True, False, True], - # TODO(deephaven-core#976): Unable to read parquet TimestampLogicalTypeAnnotation that is not adjusted to UTC - # "f": pd.date_range("20130101", periods=3), + "f": pd.date_range("20130101", periods=3), "g": pd.date_range("20130101", periods=3, tz="US/Eastern"), "h": pd.Categorical(list("abc")), "i": pd.Categorical(list("abc"), ordered=True), diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index c578137e336..026617ed081 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -62,7 +62,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.time.Instant; -import java.time.LocalTime; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -129,6 +128,7 @@ private static Table getTableFlat(int size, boolean includeSerializable, boolean "someBiColumn = java.math.BigInteger.valueOf(ii)", "someDateColumn = i % 10 == 0 ? null : java.time.LocalDate.ofEpochDay(i)", "someTimeColumn = i % 10 == 0 ? null : java.time.LocalTime.of(i%24, i%60, (i+10)%60)", + "someDateTimeColumn = i % 10 == 0 ? null : java.time.LocalDateTime.of(2000+i%10, i%12+1, i%30+1, (i+4)%24, (i+5)%60, (i+6)%60, i)", "nullKey = i < -1?`123`:null", "nullIntColumn = (int)null", "nullLongColumn = (long)null", @@ -507,9 +507,10 @@ public void testArrayColumns() { "someByteArrayColumn = new byte[] {i % 10 == 0 ? null : (byte)i}", "someCharArrayColumn = new char[] {i % 10 == 0 ? null : (char)i}", "someTimeArrayColumn = new Instant[] {i % 10 == 0 ? null : (Instant)DateTimeUtils.now() + i}", - "someBiColumn = new java.math.BigInteger[] {i % 10 == 0 ? null : java.math.BigInteger.valueOf(i)}", - "someDateColumn = new java.time.LocalDate[] {i % 10 == 0 ? null : java.time.LocalDate.ofEpochDay(i)}", - "someTimeColumn = new java.time.LocalTime[] {i % 10 == 0 ? null : java.time.LocalTime.of(i%24, i%60, (i+10)%60)}", + "someBiArrayColumn = new java.math.BigInteger[] {i % 10 == 0 ? null : java.math.BigInteger.valueOf(i)}", + "someDateArrayColumn = new java.time.LocalDate[] {i % 10 == 0 ? null : java.time.LocalDate.ofEpochDay(i)}", + "someTimeArrayColumn = new java.time.LocalTime[] {i % 10 == 0 ? null : java.time.LocalTime.of(i%24, i%60, (i+10)%60)}", + "someDateTimeArrayColumn = new java.time.LocalDateTime[] {i % 10 == 0 ? null : java.time.LocalDateTime.of(2000+i%10, i%12+1, i%30+1, (i+4)%24, (i+5)%60, (i+6)%60, i)}", "nullStringArrayColumn = new String[] {(String)null}", "nullIntArrayColumn = new int[] {(int)null}", "nullLongArrayColumn = new long[] {(long)null}", @@ -1271,8 +1272,10 @@ public void readWriteStatisticsTest() { public void readWriteDateTimeTest() { final int NUM_ROWS = 1000; final Table table = TableTools.emptyTable(NUM_ROWS).view( - "someDateColumn = i % 10 == 0 ? null : java.time.LocalDate.ofEpochDay(i)", - "someTimeColumn = i % 10 == 0 ? null : java.time.LocalTime.of(i%24, i%60, (i+10)%60)"); + "someDateColumn = java.time.LocalDate.ofEpochDay(i)", + "someTimeColumn = java.time.LocalTime.of(i%24, i%60, (i+10)%60)", + "someLocalDateTimeColumn = java.time.LocalDateTime.of(2000+i%10, i%12+1, i%30+1, (i+4)%24, (i+5)%60, (i+6)%60, i)", + "someInstantColumn = DateTimeUtils.now() + i").select(); final File dest = new File(rootFile, "readWriteDateTimeTest.parquet"); writeReadTableTest(table, dest); @@ -1286,9 +1289,22 @@ public void readWriteDateTimeTest() { final ColumnChunkMetaData timeColMetadata = metadata.getBlocks().get(0).getColumns().get(1); assertTrue(timeColMetadata.toString().contains("someTimeColumn")); assertEquals(PrimitiveType.PrimitiveTypeName.INT64, timeColMetadata.getPrimitiveType().getPrimitiveTypeName()); - final boolean isAdjustedToUTC = true; - assertEquals(LogicalTypeAnnotation.timeType(isAdjustedToUTC, LogicalTypeAnnotation.TimeUnit.NANOS), + assertEquals(LogicalTypeAnnotation.timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS), timeColMetadata.getPrimitiveType().getLogicalTypeAnnotation()); + + final ColumnChunkMetaData localDateTimeColMetadata = metadata.getBlocks().get(0).getColumns().get(2); + assertTrue(localDateTimeColMetadata.toString().contains("someLocalDateTimeColumn")); + assertEquals(PrimitiveType.PrimitiveTypeName.INT64, + localDateTimeColMetadata.getPrimitiveType().getPrimitiveTypeName()); + assertEquals(LogicalTypeAnnotation.timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS), + localDateTimeColMetadata.getPrimitiveType().getLogicalTypeAnnotation()); + + final ColumnChunkMetaData instantColMetadata = metadata.getBlocks().get(0).getColumns().get(3); + assertTrue(instantColMetadata.toString().contains("someInstantColumn")); + assertEquals(PrimitiveType.PrimitiveTypeName.INT64, + instantColMetadata.getPrimitiveType().getPrimitiveTypeName()); + assertEquals(LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS), + instantColMetadata.getPrimitiveType().getLogicalTypeAnnotation()); } /** diff --git a/extensions/parquet/table/src/test/resources/e0/brotli.parquet b/extensions/parquet/table/src/test/resources/e0/brotli.parquet index e26a64f8c61..0fcda0a3ec7 100644 --- a/extensions/parquet/table/src/test/resources/e0/brotli.parquet +++ b/extensions/parquet/table/src/test/resources/e0/brotli.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c7b823aa5e020d7d17cf60d9d34dcdbf87578a56eb3ba20bf8a9e540a2a6d6a9 -size 5751 +oid sha256:9ffb97b406a6b35340d8d7c7d59702b85a0eda9055fb11e3b96b83bab4172e37 +size 6399 diff --git a/extensions/parquet/table/src/test/resources/e0/gzip.parquet b/extensions/parquet/table/src/test/resources/e0/gzip.parquet index 39cd91a4ee3..0c3e6ffb24a 100644 --- a/extensions/parquet/table/src/test/resources/e0/gzip.parquet +++ b/extensions/parquet/table/src/test/resources/e0/gzip.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91e447006fe6655210561391e0abb9eb72d5984b81b9eaee587739f450c52b8d -size 5962 +oid sha256:81ef4ceeabaf58d72201d2a6a3273f8874646f7101b7a3fc5ae6d01479ae9b3a +size 6639 diff --git a/extensions/parquet/table/src/test/resources/e0/lz4.parquet b/extensions/parquet/table/src/test/resources/e0/lz4.parquet index 3dd0120a98c..18f29cea78d 100644 --- a/extensions/parquet/table/src/test/resources/e0/lz4.parquet +++ b/extensions/parquet/table/src/test/resources/e0/lz4.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ccbcc31974798fa363cb7ef8d4a6630d14e7ebdc44716a857c38d963c37077c -size 5718 +oid sha256:4019e7677d826e82069439a4a78513713c95d4c7130bb6af8a86220d11a59306 +size 6361 diff --git a/extensions/parquet/table/src/test/resources/e0/snappy.parquet b/extensions/parquet/table/src/test/resources/e0/snappy.parquet index dd0ef9114a8..6b2f0743603 100644 --- a/extensions/parquet/table/src/test/resources/e0/snappy.parquet +++ b/extensions/parquet/table/src/test/resources/e0/snappy.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d941980a47287b30d526f4d826b6e23cb7b67c2f1a69e6fc094fb763cfdcbb22 -size 5732 +oid sha256:b3d39d73e7cecc035b5574e8a84dfd1583ab3fdc9896cb1a02608728e9956392 +size 6376 diff --git a/extensions/parquet/table/src/test/resources/e0/uncompressed.parquet b/extensions/parquet/table/src/test/resources/e0/uncompressed.parquet index cc6d359aa9e..84734f0d18a 100644 --- a/extensions/parquet/table/src/test/resources/e0/uncompressed.parquet +++ b/extensions/parquet/table/src/test/resources/e0/uncompressed.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:72822289b3fadc5d19cafb317d176cd944b8aa2e43916988b20c93a327bf24c5 -size 5709 +oid sha256:0e27404e9e31261706142b18ed4968f4fd0237443002803f891c5a32f23f8998 +size 6349 diff --git a/extensions/parquet/table/src/test/resources/e0/zstd.parquet b/extensions/parquet/table/src/test/resources/e0/zstd.parquet index 20e8f23b9a2..cc65c3e146b 100644 --- a/extensions/parquet/table/src/test/resources/e0/zstd.parquet +++ b/extensions/parquet/table/src/test/resources/e0/zstd.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86ceca58808e97b3454c689468ca987cdfe8651f264a6ee6cff7880b99a1fba5 -size 5836 +oid sha256:efd4cd7542780599d1e6ff55d1945745652e454167412482024a48b478245bec +size 6494 diff --git a/extensions/parquet/table/src/test/resources/e1/brotli.parquet b/extensions/parquet/table/src/test/resources/e1/brotli.parquet index a91acfd4788..818f4ac6d6a 100644 --- a/extensions/parquet/table/src/test/resources/e1/brotli.parquet +++ b/extensions/parquet/table/src/test/resources/e1/brotli.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38834b6a8ca6ffb787732878bfe48c677bb78885610916e83a7186bae2191471 -size 5752 +oid sha256:a232f7c7689b3b87bb2722d939ee9cbfdec14159a31e14bda7e6889ba331ee54 +size 6400 diff --git a/extensions/parquet/table/src/test/resources/e1/gzip.parquet b/extensions/parquet/table/src/test/resources/e1/gzip.parquet index c38f1487f95..6204bf4ff27 100644 --- a/extensions/parquet/table/src/test/resources/e1/gzip.parquet +++ b/extensions/parquet/table/src/test/resources/e1/gzip.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5742a0708f44531110116dd970c5b9e0a934bd5737d295299ee2bf6d623c435f -size 5963 +oid sha256:ac3fe36a375694571e96deb2ec6532c3f6c2370a1e2d3e2e25d1364d722a863e +size 6640 diff --git a/extensions/parquet/table/src/test/resources/e1/lz4.parquet b/extensions/parquet/table/src/test/resources/e1/lz4.parquet index 6baaaf61cd0..68ff7618be6 100644 --- a/extensions/parquet/table/src/test/resources/e1/lz4.parquet +++ b/extensions/parquet/table/src/test/resources/e1/lz4.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d157fa3942727b0fb0abbec859957e77c2b4371b2d5f5ed5a196eca062f21c8 -size 5719 +oid sha256:a2e43a73f87be35f31f255662daf9d6e0eb2cdd7ecf186ad9202f3bdd265af11 +size 6362 diff --git a/extensions/parquet/table/src/test/resources/e1/snappy.parquet b/extensions/parquet/table/src/test/resources/e1/snappy.parquet index e914e31503c..48aa69e58fd 100644 --- a/extensions/parquet/table/src/test/resources/e1/snappy.parquet +++ b/extensions/parquet/table/src/test/resources/e1/snappy.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f065dfbde45b490c53b99f7af77aea472f0b93b8c6627319ab73494ba9b6bd4 -size 5733 +oid sha256:4ee17abd8fa80d117e29aac6165bcaaa7d206c94c43d7f8bfec1fd91620244f8 +size 6377 diff --git a/extensions/parquet/table/src/test/resources/e1/uncompressed.parquet b/extensions/parquet/table/src/test/resources/e1/uncompressed.parquet index 94ee35905cd..d86cd939673 100644 --- a/extensions/parquet/table/src/test/resources/e1/uncompressed.parquet +++ b/extensions/parquet/table/src/test/resources/e1/uncompressed.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cc55e3df0771ca17eaaf2d39f11759c203d8d96746beca3055090449bb67bf00 -size 5710 +oid sha256:c5cc2852ee31f844057ec8c5e14ce6dc94a42165ce94dc0a71ebcdb88eb39c17 +size 6350 diff --git a/extensions/parquet/table/src/test/resources/e1/zstd.parquet b/extensions/parquet/table/src/test/resources/e1/zstd.parquet index b654640e0f6..fd64a4776ff 100644 --- a/extensions/parquet/table/src/test/resources/e1/zstd.parquet +++ b/extensions/parquet/table/src/test/resources/e1/zstd.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2d13ef75d368abe191c89dce35a1266afa38116b171c406903648e01fa079a71 -size 5841 +oid sha256:94d3ecb405d8c1e13f3f636ccdef0963cde18aaf7cbf70183d9848ede8ec6241 +size 6499 diff --git a/extensions/parquet/table/src/test/resources/e2/brotli.parquet b/extensions/parquet/table/src/test/resources/e2/brotli.parquet index 2fde3adac4d..56a7c8f5975 100644 --- a/extensions/parquet/table/src/test/resources/e2/brotli.parquet +++ b/extensions/parquet/table/src/test/resources/e2/brotli.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94ee47283962486ad03d8c5edb195dc4bd14316258e1daf579de0bdac2b02bf3 -size 2375 +oid sha256:5d4cfc16fe3ab70ba5dff2e7cbc4e69f2245e7729240f10519c6e51f6aeab375 +size 2623 diff --git a/extensions/parquet/table/src/test/resources/e2/gzip.parquet b/extensions/parquet/table/src/test/resources/e2/gzip.parquet index 16c359740dd..8f31017a389 100644 --- a/extensions/parquet/table/src/test/resources/e2/gzip.parquet +++ b/extensions/parquet/table/src/test/resources/e2/gzip.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:725e4d9e60f33ae2da352124e68c195753c929cebe3b87c27d8814a11bfef3d4 -size 2519 +oid sha256:b20a2c66b37d8cb8d9775fb022e419dffee76e80519169e7d47b8bdee65b2fc1 +size 2775 diff --git a/extensions/parquet/table/src/test/resources/e2/lz4.parquet b/extensions/parquet/table/src/test/resources/e2/lz4.parquet index ac80cd17b63..047397beb8d 100644 --- a/extensions/parquet/table/src/test/resources/e2/lz4.parquet +++ b/extensions/parquet/table/src/test/resources/e2/lz4.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c1b0e3853ae0479aae8dd9f2b4007dbf9106b2e5bcc88147d1174e7dec2bfcd4 -size 2419 +oid sha256:9e79e0aab241bbead0f106464c5059cdea9c9e6a21bf69be3796418e4af615fa +size 2664 diff --git a/extensions/parquet/table/src/test/resources/e2/snappy.parquet b/extensions/parquet/table/src/test/resources/e2/snappy.parquet index 2b0b2a54217..d689d97bd96 100644 --- a/extensions/parquet/table/src/test/resources/e2/snappy.parquet +++ b/extensions/parquet/table/src/test/resources/e2/snappy.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:729c42915bbe71ae43c2d1cec6e1755043b5793f5b4aaf25e21fbb24674b9406 -size 2422 +oid sha256:9524c4cabf64762de3be70e6926e978c3466f4404d48cf14654e095b96b5e170 +size 2667 diff --git a/extensions/parquet/table/src/test/resources/e2/uncompressed.parquet b/extensions/parquet/table/src/test/resources/e2/uncompressed.parquet index 1b18f5ec570..865a2c16188 100644 --- a/extensions/parquet/table/src/test/resources/e2/uncompressed.parquet +++ b/extensions/parquet/table/src/test/resources/e2/uncompressed.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b93a2235fc98f3a05365a13c180bd51237eceef43d717c4b772911a3782cb335 -size 2419 +oid sha256:a523c7ca94006d72739cb489c4e527a593617295d492c8b824852f3e1c786c77 +size 2663 diff --git a/extensions/parquet/table/src/test/resources/e2/zstd.parquet b/extensions/parquet/table/src/test/resources/e2/zstd.parquet index aa9a74dc97f..449da5ebe7f 100644 --- a/extensions/parquet/table/src/test/resources/e2/zstd.parquet +++ b/extensions/parquet/table/src/test/resources/e2/zstd.parquet @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:326594350ab774ea7d42106c57181c7b831ee932ba56543a0ebfea6079d7d8ef -size 2476 +oid sha256:3e18d0a97ba9e39b74abcac070984db144eba3879d2d7cde43cd22471aa5feaa +size 2730 diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index c921a7e02a4..b2cd933740b 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -316,25 +316,6 @@ def test_writing_lists_via_pyarrow(self): pa_table_from_disk = dharrow.to_arrow(from_disk) self.assertTrue(pa_table.equals(pa_table_from_disk)) - def test_writing_time_via_pyarrow(self): - def _test_writing_time_helper(filename): - metadata = pyarrow.parquet.read_metadata(filename) - if "isAdjustedToUTC=false" in str(metadata.row_group(0).column(0)): - # TODO(deephaven-core#976): Unable to read non UTC adjusted timestamps - with self.assertRaises(DHError) as e: - read(filename) - self.assertIn("ParquetFileReaderException", e.exception.root_cause) - - df = pandas.DataFrame({ - "f": pandas.date_range("20130101", periods=3), - }) - df.to_parquet("pyarrow_26.parquet", engine='pyarrow', compression=None, version='2.6') - _test_writing_time_helper("pyarrow_26.parquet") - df.to_parquet("pyarrow_24.parquet", engine='pyarrow', compression=None, version='2.4') - _test_writing_time_helper("pyarrow_24.parquet") - df.to_parquet("pyarrow_10.parquet", engine='pyarrow', compression=None, version='1.0') - _test_writing_time_helper("pyarrow_10.parquet") - def test_dictionary_encoding(self): dh_table = empty_table(10).update(formulas=[ "shortStringColumn = `Row ` + i", @@ -406,7 +387,7 @@ def time_test_helper(pa_table, new_schema, dest): df_from_disk = to_pandas(from_disk) original_df = pa_table.to_pandas() # Compare the dataframes as strings - print((df_from_disk.astype(str) == original_df.astype(str)).all().values.all()) + self.assertTrue((df_from_disk.astype(str) == original_df.astype(str)).all().values.all()) # Test for nanoseconds, microseconds, and milliseconds schema_nsec = table.schema.set(0, pyarrow.field('someTimeColumn', pyarrow.time64('ns'))) @@ -418,5 +399,40 @@ def time_test_helper(pa_table, new_schema, dest): schema_msec = table.schema.set(0, pyarrow.field('someTimeColumn', pyarrow.time32('ms'))) time_test_helper(table, schema_msec, "data_from_pq_msec.parquet") + def test_non_utc_adjusted_timestamps(self): + """ Test that we can read and read timestamp columns with isAdjustedToUTC set as false and different units """ + df = pandas.DataFrame({ + "f": pandas.date_range("11:00:00", "11:00:01", freq="1ms") + }) + # Sprinkle some nulls + df["f"][0] = df["f"][5] = None + table = pyarrow.Table.from_pandas(df) + + def timestamp_test_helper(pa_table, new_schema, dest): + # Cast the table to new schema and write it using pyarrow + pa_table = pa_table.cast(new_schema) + pyarrow.parquet.write_table(pa_table, dest) + # Verify that isAdjustedToUTC set as false in the metadata + metadata = pyarrow.parquet.read_metadata(dest) + if "isAdjustedToUTC=false" not in str(metadata.row_group(0).column(0)): + self.fail("isAdjustedToUTC is not set to false") + # Read the parquet file back using deephaven and write it back + dh_table_from_disk = read(dest) + dh_dest = "dh_" + dest + write(dh_table_from_disk, dh_dest) + # Read the new parquet file using pyarrow and compare against original table + pa_table_from_disk = pyarrow.parquet.read_table(dh_dest) + self.assertTrue(pa_table == pa_table_from_disk.cast(new_schema)) + + schema_nsec = table.schema.set(0, pyarrow.field('f', pyarrow.timestamp('ns'))) + timestamp_test_helper(table, schema_nsec, 'timestamp_test_nsec.parquet') + + schema_usec = table.schema.set(0, pyarrow.field('f', pyarrow.timestamp('us'))) + timestamp_test_helper(table, schema_usec, 'timestamp_test_usec.parquet') + + schema_msec = table.schema.set(0, pyarrow.field('f', pyarrow.timestamp('ms'))) + timestamp_test_helper(table, schema_msec, 'timestamp_test_msec.parquet') + + if __name__ == '__main__': unittest.main() diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java index 1a90d784880..eb8333ebfeb 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java @@ -20,9 +20,17 @@ public class ReplicateParquetTransferObjects { private static final String PARQUET_INSTANT_VECTOR_TRANSFER_PATH = PARQUET_TRANSFER_DIR + "InstantVectorTransfer.java"; + private static final String PARQUET_LOCAL_DATE_TIME_TRANSFER_PATH = + PARQUET_TRANSFER_DIR + "LocalDateTimeTransfer.java"; + private static final String PARQUET_LOCAL_DATE_TIME_ARRAY_TRANSFER_PATH = + PARQUET_TRANSFER_DIR + "LocalDateTimeArrayTransfer.java"; + private static final String PARQUET_LOCAL_DATE_TIME_VECTOR_TRANSFER_PATH = + PARQUET_TRANSFER_DIR + "LocalDateTimeVectorTransfer.java"; + private static final String PARQUET_DATE_ARRAY_TRANSFER_PATH = PARQUET_TRANSFER_DIR + "DateArrayTransfer.java"; private static final String PARQUET_DATE_VECTOR_TRANSFER_PATH = PARQUET_TRANSFER_DIR + "DateVectorTransfer.java"; + private static final String PARQUET_TIME_TRANSFER_PATH = PARQUET_TRANSFER_DIR + "TimeTransfer.java"; private static final String PARQUET_TIME_ARRAY_TRANSFER_PATH = PARQUET_TRANSFER_DIR + "TimeArrayTransfer.java"; private static final String PARQUET_TIME_VECTOR_TRANSFER_PATH = PARQUET_TRANSFER_DIR + "TimeVectorTransfer.java"; @@ -59,6 +67,25 @@ public static void main(String[] args) throws IOException { }; replaceAll(PARQUET_INSTANT_ARRAY_TRANSFER_PATH, PARQUET_TIME_ARRAY_TRANSFER_PATH, null, NO_EXCEPTIONS, pairs); replaceAll(PARQUET_INSTANT_VECTOR_TRANSFER_PATH, PARQUET_TIME_VECTOR_TRANSFER_PATH, null, NO_EXCEPTIONS, pairs); + + pairs = new String[][] { + {"InstantArrayTransfer", "LocalDateTimeArrayTransfer"}, + {"InstantVectorTransfer", "LocalDateTimeVectorTransfer"}, + {"DateTimeUtils.epochNanos", "DateTimeUtils.epochNanosUTC"}, + {"Instant", "LocalDateTime"} + }; + replaceAll(PARQUET_INSTANT_ARRAY_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_ARRAY_TRANSFER_PATH, null, + NO_EXCEPTIONS, pairs); + replaceAll(PARQUET_INSTANT_VECTOR_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_VECTOR_TRANSFER_PATH, null, + NO_EXCEPTIONS, pairs); + + pairs = new String[][] { + {"TimeTransfer", "LocalDateTimeTransfer"}, + {"LocalTime", "LocalDateTime"}, + {"DateTimeUtils.nanosOfDay", "DateTimeUtils.epochNanosUTC"} + }; + replaceAll(PARQUET_TIME_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_TRANSFER_PATH, null, NO_EXCEPTIONS, pairs); + // Additional differences can be generated by Spotless } } From c9652a19f2f537472d1134261339767d00c23c1f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 10 Nov 2023 16:47:25 -0500 Subject: [PATCH 09/41] Update web version 0.54.0 (#4812) Release notes https://github.com/deephaven/web-client-ui/releases/tag/v0.54.0 # [0.54.0](https://github.com/deephaven/web-client-ui/compare/v0.53.0...v0.54.0) (2023-11-10) ### Bug Fixes * Date argument non-optional for the onChange prop ([#1622](https://github.com/deephaven/web-client-ui/issues/1622)) ([9a960b3](https://github.com/deephaven/web-client-ui/commit/9a960b3a50eed904fce61d3e97307261582a1de7)), closes [#1601](https://github.com/deephaven/web-client-ui/issues/1601) * Fixing grid colors and grays ([#1621](https://github.com/deephaven/web-client-ui/issues/1621)) ([9ab2b1e](https://github.com/deephaven/web-client-ui/commit/9ab2b1e3204c7f854b8526e510b1e5a5fc59b8f6)), closes [#1572](https://github.com/deephaven/web-client-ui/issues/1572) * Infinite loop with grid rendering ([#1631](https://github.com/deephaven/web-client-ui/issues/1631)) ([4875d2e](https://github.com/deephaven/web-client-ui/commit/4875d2e1e895478720950ad73f28d1b895114a58)), closes [#1626](https://github.com/deephaven/web-client-ui/issues/1626) * Log figure errors, don't show infinite spinner ([#1614](https://github.com/deephaven/web-client-ui/issues/1614)) ([75783d0](https://github.com/deephaven/web-client-ui/commit/75783d0ed96e9e28214ca8681a73f23b1dc78085)) * non-contiguous table row selection background colour ([#1623](https://github.com/deephaven/web-client-ui/issues/1623)) ([61d1a53](https://github.com/deephaven/web-client-ui/commit/61d1a537ac9df31e3fe3dad95107b065a12ebd3b)), closes [#1619](https://github.com/deephaven/web-client-ui/issues/1619) * Panels not reinitializing if makeModel changes ([#1633](https://github.com/deephaven/web-client-ui/issues/1633)) ([5ee98cd](https://github.com/deephaven/web-client-ui/commit/5ee98cd8121a90535536ac6c429bbd0ba2c1a2f3)) * remove unecessary dom re-calc in grid render ([#1632](https://github.com/deephaven/web-client-ui/issues/1632)) ([ce7cc3e](https://github.com/deephaven/web-client-ui/commit/ce7cc3e6104eb208b3b36e51f62d284dfd7f57bc)) ### Features * Add `LayoutManagerContext` and `useLayoutManager` ([#1625](https://github.com/deephaven/web-client-ui/issues/1625)) ([0a6965a](https://github.com/deephaven/web-client-ui/commit/0a6965a41953470cb032ef44d93497fa438783e4)) * Add ResizeObserver to Grid and Chart ([#1626](https://github.com/deephaven/web-client-ui/issues/1626)) ([35311c8](https://github.com/deephaven/web-client-ui/commit/35311c832040b29e362c28f80983b4664c9aa1d5)) * Added test:debug script ([#1628](https://github.com/deephaven/web-client-ui/issues/1628)) ([80f29f5](https://github.com/deephaven/web-client-ui/commit/80f29f57ffae49c5161d4a2431b46fe5af2384af)), closes [#1627](https://github.com/deephaven/web-client-ui/issues/1627) * Read settings from props/server config when available ([#1558](https://github.com/deephaven/web-client-ui/issues/1558)) ([52ba2cd](https://github.com/deephaven/web-client-ui/commit/52ba2cd125ff68f71c479d2d7c82f4b08d5b2ab6)) * Theming - Charts ([#1608](https://github.com/deephaven/web-client-ui/issues/1608)) ([d5b3b48](https://github.com/deephaven/web-client-ui/commit/d5b3b485dfc95248bdd1d664152c6c1ab288720a)), closes [#1572](https://github.com/deephaven/web-client-ui/issues/1572) ### BREAKING CHANGES - ChartThemeProvider is now required to provide ChartTheme - ChartModelFactory and ChartUtils now require chartTheme args Co-authored-by: deephaven-internal --- web/client-ui/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web/client-ui/Dockerfile b/web/client-ui/Dockerfile index d299787065d..15eb40807d4 100644 --- a/web/client-ui/Dockerfile +++ b/web/client-ui/Dockerfile @@ -2,9 +2,9 @@ FROM deephaven/node:local-build WORKDIR /usr/src/app # Most of the time, these versions are the same, except in cases where a patch only affects one of the packages -ARG WEB_VERSION=0.53.0 -ARG GRID_VERSION=0.53.0 -ARG CHART_VERSION=0.53.0 +ARG WEB_VERSION=0.54.0 +ARG GRID_VERSION=0.54.0 +ARG CHART_VERSION=0.54.0 # Pull in the published code-studio package from npmjs and extract is RUN set -eux; \ From eb704d876440aca61e9af91c7fb3564955af2562 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 10 Nov 2023 13:48:30 -0800 Subject: [PATCH 10/41] Bump images post 0.30.0 release (#4807) Update some unit testing gradle wrapper versions to ensure inner-gradle works with OpenJDK 21 https://github.com/deephaven/deephaven-server-docker/pull/69 https://github.com/deephaven/deephaven-server-docker/pull/70 Not updating cpp or protoc base, see https://github.com/deephaven/deephaven-base-images/pull/102 https://github.com/deephaven/deephaven-core/pull/4805 --- docker/registry/go/gradle.properties | 2 +- docker/registry/python/gradle.properties | 2 +- docker/registry/server-base/gradle.properties | 2 +- docker/registry/slim-base/gradle.properties | 2 +- .../src/main/server-jetty/requirements.txt | 20 +++++++++---------- .../src/main/server-netty/requirements.txt | 20 +++++++++---------- py/jpy-integration/build.gradle | 2 +- python-engine-test/build.gradle | 2 +- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docker/registry/go/gradle.properties b/docker/registry/go/gradle.properties index b1464a90e5d..f41e6f97c59 100644 --- a/docker/registry/go/gradle.properties +++ b/docker/registry/go/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=golang:1 -deephaven.registry.imageId=golang@sha256:a0e3e6859220ee48340c5926794ce87a891a1abb51530573c694317bf8f72543 +deephaven.registry.imageId=golang@sha256:81cd210ae58a6529d832af2892db822b30d84f817a671b8e1c15cff0b271a3db diff --git a/docker/registry/python/gradle.properties b/docker/registry/python/gradle.properties index acb569993f7..3a69cba5d88 100644 --- a/docker/registry/python/gradle.properties +++ b/docker/registry/python/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=python:3.10 -deephaven.registry.imageId=python@sha256:74cdd039dc36f6476dd5dfdbc187830e0c0f760a1bdfc73d186060ef4c4bd78f +deephaven.registry.imageId=python@sha256:eac7369136625549bc3f7461fe072b1030f538ea20d6291e9b56896d6a40559c diff --git a/docker/registry/server-base/gradle.properties b/docker/registry/server-base/gradle.properties index 14b2e0ca5fe..f24e145c310 100644 --- a/docker/registry/server-base/gradle.properties +++ b/docker/registry/server-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:89b11b535ed67027d4370a95fd37d348ca472ee0ee987aea6804c6ee4620fbd2 +deephaven.registry.imageId=ghcr.io/deephaven/server-base@sha256:b02de3d96469d38a2ba5999f04a6d99e0c5f5e5e34482e57d47bd4bb64108a7c diff --git a/docker/registry/slim-base/gradle.properties b/docker/registry/slim-base/gradle.properties index f32d672ac22..10e5545ced0 100644 --- a/docker/registry/slim-base/gradle.properties +++ b/docker/registry/slim-base/gradle.properties @@ -1,3 +1,3 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/server-slim-base:edge -deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:9e8b61362bb60e9797c9939b4955c5a3b01a228e2d1b3b2d9f461c0729c805a5 +deephaven.registry.imageId=ghcr.io/deephaven/server-slim-base@sha256:5d9eb6e5c3507d1d463720bb61c4ea62b98cf48a482c8c8f3faeaaf01b3c5f1f diff --git a/docker/server-jetty/src/main/server-jetty/requirements.txt b/docker/server-jetty/src/main/server-jetty/requirements.txt index 045f63d7589..abd518c2e76 100644 --- a/docker/server-jetty/src/main/server-jetty/requirements.txt +++ b/docker/server-jetty/src/main/server-jetty/requirements.txt @@ -1,18 +1,18 @@ -adbc-driver-manager==0.5.1 -adbc-driver-postgresql==0.5.1 -connectorx==0.3.1; platform.machine == 'x86_64' +adbc-driver-manager==0.8.0 +adbc-driver-postgresql==0.8.0 +connectorx==0.3.2; platform.machine == 'x86_64' deephaven-plugin==0.5.0 java-utilities==0.2.0 jedi==0.18.2 jpy==0.14.0 -llvmlite==0.40.1 -numba==0.57.1 -numpy==1.24.4 -pandas==2.0.3 +llvmlite==0.41.1 +numba==0.58.1 +numpy==1.26.1 +pandas==2.1.2 parso==0.8.3 -pyarrow==12.0.1 +pyarrow==13.0.0 python-dateutil==2.8.2 -pytz==2023.3 +pytz==2023.3.post1 six==1.16.0 -turbodbc==4.6.0 +turbodbc==4.7.0 tzdata==2023.3 diff --git a/docker/server/src/main/server-netty/requirements.txt b/docker/server/src/main/server-netty/requirements.txt index 045f63d7589..abd518c2e76 100644 --- a/docker/server/src/main/server-netty/requirements.txt +++ b/docker/server/src/main/server-netty/requirements.txt @@ -1,18 +1,18 @@ -adbc-driver-manager==0.5.1 -adbc-driver-postgresql==0.5.1 -connectorx==0.3.1; platform.machine == 'x86_64' +adbc-driver-manager==0.8.0 +adbc-driver-postgresql==0.8.0 +connectorx==0.3.2; platform.machine == 'x86_64' deephaven-plugin==0.5.0 java-utilities==0.2.0 jedi==0.18.2 jpy==0.14.0 -llvmlite==0.40.1 -numba==0.57.1 -numpy==1.24.4 -pandas==2.0.3 +llvmlite==0.41.1 +numba==0.58.1 +numpy==1.26.1 +pandas==2.1.2 parso==0.8.3 -pyarrow==12.0.1 +pyarrow==13.0.0 python-dateutil==2.8.2 -pytz==2023.3 +pytz==2023.3.post1 six==1.16.0 -turbodbc==4.6.0 +turbodbc==4.7.0 tzdata==2023.3 diff --git a/py/jpy-integration/build.gradle b/py/jpy-integration/build.gradle index 64a3116d269..83535511219 100644 --- a/py/jpy-integration/build.gradle +++ b/py/jpy-integration/build.gradle @@ -117,7 +117,7 @@ Closure> gradleTestInDocker = { String taskName, SourceSet so def gradleWrapper = tasks.register("${taskName}GradleInit", Wrapper.class) { wrapper -> wrapper.scriptFile "${buildDir}/template-project/gradlew" wrapper.jarFile "${buildDir}/template-project/gradle/wrapper/gradle-wrapper.jar" - wrapper.gradleVersion '7.2' + wrapper.gradleVersion '8.4' } return Docker.registerDockerTask(project, taskName) { copyIn { diff --git a/python-engine-test/build.gradle b/python-engine-test/build.gradle index 64398e5d655..8c24a953e6a 100644 --- a/python-engine-test/build.gradle +++ b/python-engine-test/build.gradle @@ -41,7 +41,7 @@ dependencies { def gradleWrapper = tasks.register("dockerGradleInit", Wrapper.class) { wrapper -> wrapper.scriptFile "${buildDir}/template-project/gradlew" wrapper.jarFile "${buildDir}/template-project/gradle/wrapper/gradle-wrapper.jar" - wrapper.gradleVersion '7.2' + wrapper.gradleVersion '8.4' } tasks.getByName('check').dependsOn Docker.registerDockerTask(project, 'test-in-docker') { From 611ce0e2d91609036d15da7ac6341b7cf36584e9 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Fri, 10 Nov 2023 16:51:00 -0600 Subject: [PATCH 11/41] Avoid extra copying of r docs (#4811) Also prevents a dependency cycle that Gradle might complain about. --- R/build.gradle | 20 ++++---------------- R/r-doc.sh | 11 ----------- 2 files changed, 4 insertions(+), 27 deletions(-) diff --git a/R/build.gradle b/R/build.gradle index 10fa09cfdbe..96c00db6d86 100644 --- a/R/build.gradle +++ b/R/build.gradle @@ -38,7 +38,6 @@ def buildRClient = Docker.registerDockerTask(project, 'rClient') { include 'rdeephaven/NAMESPACE' include 'rdeephaven/README.md' include 'rdeephaven/inst/**' - include 'rdeephaven/man/**' include 'rdeephaven/etc/**' include 'rdeephaven/R/**' include 'rdeephaven/src/*.cpp' @@ -54,14 +53,13 @@ def buildRClient = Docker.registerDockerTask(project, 'rClient') { /out \\ ${prefix}/log \\ ${prefix}/bin/rdeephaven \\ - ${prefix}/src/rdeephaven/{inst,man,etc,src,R,bin} + ${prefix}/src/rdeephaven/{inst,etc,src,R,bin} """) copyFile('rdeephaven/DESCRIPTION', "${prefix}/src/rdeephaven/") copyFile('rdeephaven/LICENSE', "${prefix}/src/rdeephaven/") copyFile('rdeephaven/NAMESPACE', "${prefix}/src/rdeephaven/") copyFile('rdeephaven/README.md', "${prefix}/src/rdeephaven/") copyFile('rdeephaven/inst/', "${prefix}/src/rdeephaven/inst/") - copyFile('rdeephaven/man/', "${prefix}/src/rdeephaven/man/") copyFile('rdeephaven/etc/', "${prefix}/src/rdeephaven/etc/") copyFile('rdeephaven/R/', "${prefix}/src/rdeephaven/R/") copyFile('rdeephaven/src/*.cpp', "${prefix}/src/rdeephaven/src/") @@ -115,7 +113,7 @@ def rClientDoc = Docker.registerDockerTask(project, 'rClientDoc') { } } copyOut { - into layout.buildDirectory.dir('man') + into layout.projectDirectory.dir('rdeephaven/man') } dockerfile { from('deephaven/r-client:local-build') @@ -153,18 +151,8 @@ def rClientDoc = Docker.registerDockerTask(project, 'rClientDoc') { copyFile('r-doc.sh', "${prefix}/bin/rdeephaven") } parentContainers = [ project.tasks.getByName('rClient') ] - entrypoint = ["${prefix}/bin/rdeephaven/r-doc.sh", '/out'] -} - -task updateRClientDoc { - dependsOn rClientDoc - doLast { - exec { - workingDir '.' - commandLine 'rm', '-f', 'man/*' - commandLine 'tar', '-C', 'rdeephaven', '-zxvf', 'build/man/man.tgz' - } - } + entrypoint = ["${prefix}/bin/rdeephaven/r-doc.sh"] + containerOutPath = "${prefix}/src/rdeephaven/man" } deephavenDocker.shouldLogIfTaskFails testRClient diff --git a/R/r-doc.sh b/R/r-doc.sh index 239ce1a425b..39c51fe754a 100755 --- a/R/r-doc.sh +++ b/R/r-doc.sh @@ -2,11 +2,6 @@ set -euo pipefail -if [ "$#" -ne 1 ]; then - echo "Usage: $0 output-dir" 1>&2 - exit 1 -fi - if [ -z "${DH_PREFIX}" ]; then echo "$0: Environment variable DH_PREFIX is not set, aborting." 1>&2 exit 1 @@ -16,8 +11,6 @@ source $DH_PREFIX/env.sh cd $DH_PREFIX/src/rdeephaven -OUT_DIR="$1" - R --no-save --no-restore < Date: Fri, 10 Nov 2023 17:45:52 -0700 Subject: [PATCH 12/41] `TableServiceAsyncTest`: Eliminate Compiler Overhead Variance; Close `QueryCompiler`'s `JavaFileManager` (#4808) --- .../engine/context/QueryCompiler.java | 54 ++++++++++++------- .../client/DeephavenSessionTestBase.java | 12 ++++- .../client/impl/TableServiceAsyncTest.java | 36 +++++++++---- .../deephaven/client/impl/BearerHandler.java | 7 +++ .../io/deephaven/client/impl/SessionImpl.java | 4 +- .../server/runner/DeephavenApiServer.java | 2 +- .../runner/DeephavenApiServerTestBase.java | 13 +++-- 7 files changed, 91 insertions(+), 37 deletions(-) diff --git a/engine/context/src/main/java/io/deephaven/engine/context/QueryCompiler.java b/engine/context/src/main/java/io/deephaven/engine/context/QueryCompiler.java index aa4ca2f4137..6b54b9bb9fe 100644 --- a/engine/context/src/main/java/io/deephaven/engine/context/QueryCompiler.java +++ b/engine/context/src/main/java/io/deephaven/engine/context/QueryCompiler.java @@ -113,7 +113,7 @@ private QueryCompiler( try { urls[0] = (classDestination.toURI().toURL()); } catch (MalformedURLException e) { - throw new RuntimeException("", e); + throw new UncheckedDeephavenException(e); } this.ucl = new WritableURLClassLoader(urls, parentClassLoaderToUse); @@ -183,7 +183,8 @@ public static void writeClass(final File destinationDirectory, final String clas ensureDirectories(parentDir, () -> "Unable to create missing destination directory " + parentDir.getAbsolutePath()); if (!destinationFile.createNewFile()) { - throw new RuntimeException("Unable to create destination file " + destinationFile.getAbsolutePath()); + throw new UncheckedDeephavenException( + "Unable to create destination file " + destinationFile.getAbsolutePath()); } final ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream(data.length); byteOutStream.write(data, 0, data.length); @@ -274,7 +275,7 @@ private static void ensureDirectories(final File file, final Supplier ru // (and therefore mkdirs() would return false), but still get the directory we need (and therefore exists() // would return true) if (!file.mkdirs() && !file.isDirectory()) { - throw new RuntimeException(runtimeErrMsg.get()); + throw new UncheckedDeephavenException(runtimeErrMsg.get()); } } @@ -396,7 +397,7 @@ private void addClassSource(File classSourceDirectory) { try { ucl.addURL(classSourceDirectory.toURI().toURL()); } catch (MalformedURLException e) { - throw new RuntimeException("", e); + throw new UncheckedDeephavenException(e); } } @@ -425,7 +426,7 @@ private Class compileHelper(@NotNull final String className, try { digest = MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException e) { - throw new RuntimeException("Unable to create SHA-256 hashing digest", e); + throw new UncheckedDeephavenException("Unable to create SHA-256 hashing digest", e); } final String basicHashText = ByteUtils.byteArrToHex(digest.digest(classBody.getBytes(StandardCharsets.UTF_8))); @@ -645,7 +646,8 @@ private void maybeCreateClass(String className, String code, String packageName, final String[] splitPackageName = packageName.split("\\."); if (splitPackageName.length == 0) { - throw new RuntimeException(String.format("packageName %s expected to have at least one .", packageName)); + throw new UncheckedDeephavenException(String.format( + "packageName %s expected to have at least one .", packageName)); } final String[] truncatedSplitPackageName = Arrays.copyOf(splitPackageName, splitPackageName.length - 1); @@ -689,23 +691,39 @@ private void maybeCreateClassHelper(String fqClassName, String finalCode, String final JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); if (compiler == null) { - throw new RuntimeException("No Java compiler provided - are you using a JRE instead of a JDK?"); + throw new UncheckedDeephavenException("No Java compiler provided - are you using a JRE instead of a JDK?"); } final String classPathAsString = getClassPath() + File.pathSeparator + getJavaClassPath(); final List compilerOptions = Arrays.asList("-d", tempDirAsString, "-cp", classPathAsString); - final StandardJavaFileManager fileManager = compiler.getStandardFileManager(null, null, null); + final JavaFileManager fileManager = compiler.getStandardFileManager(null, null, null); - final boolean result = compiler.getTask(compilerOutput, - fileManager, - null, - compilerOptions, - null, - Collections.singletonList(new JavaSourceFromString(fqClassName, finalCode))) - .call(); + boolean result = false; + boolean exceptionThrown = false; + try { + result = compiler.getTask(compilerOutput, + fileManager, + null, + compilerOptions, + null, + Collections.singletonList(new JavaSourceFromString(fqClassName, finalCode))) + .call(); + } catch (final Throwable err) { + exceptionThrown = true; + throw err; + } finally { + try { + fileManager.close(); + } catch (final IOException ioe) { + if (!exceptionThrown) { + // noinspection ThrowFromFinallyBlock + throw new UncheckedIOException("Could not close JavaFileManager", ioe); + } + } + } if (!result) { - throw new RuntimeException("Error compiling class " + fqClassName + ":\n" + compilerOutput); + throw new UncheckedDeephavenException("Error compiling class " + fqClassName + ":\n" + compilerOutput); } // The above has compiled into e.g. // /tmp/workspace/cache/classes/temporaryCompilationDirectory12345/io/deephaven/test/cm12862183232603186v52_0/{various @@ -737,7 +755,7 @@ private void maybeCreateClassHelper(String fqClassName, String finalCode, String private Pair tryCompile(File basePath, Collection javaFiles) throws IOException { final JavaCompiler compiler = ToolProvider.getSystemJavaCompiler(); if (compiler == null) { - throw new RuntimeException("No Java compiler provided - are you using a JRE instead of a JDK?"); + throw new UncheckedDeephavenException("No Java compiler provided - are you using a JRE instead of a JDK?"); } final File outputDirectory = Files.createTempDirectory("temporaryCompilationDirectory").toFile(); @@ -828,7 +846,7 @@ private static String getJavaClassPath() { } } } catch (IOException e) { - throw new RuntimeException("Error extract manifest file from " + javaClasspath + ".\n", e); + throw new UncheckedIOException("Error extract manifest file from " + javaClasspath + ".\n", e); } } return javaClasspath; diff --git a/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java b/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java index b31e8d71eb7..d5ea32d9615 100644 --- a/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java +++ b/java-client/session-dagger/src/test/java/io/deephaven/client/DeephavenSessionTestBase.java @@ -3,8 +3,11 @@ */ package io.deephaven.client; +import io.deephaven.base.verify.Require; import io.deephaven.client.impl.Session; +import io.deephaven.client.impl.SessionImpl; import io.deephaven.server.runner.DeephavenApiServerTestBase; +import io.deephaven.server.session.SessionState; import io.grpc.ManagedChannel; import org.junit.After; import org.junit.Before; @@ -17,6 +20,7 @@ public abstract class DeephavenSessionTestBase extends DeephavenApiServerTestBas private ScheduledExecutorService sessionScheduler; protected Session session; + protected SessionState serverSessionState; @Override @Before @@ -25,8 +29,12 @@ public void setUp() throws Exception { ManagedChannel channel = channelBuilder().build(); register(channel); sessionScheduler = Executors.newScheduledThreadPool(2); - session = DaggerDeephavenSessionRoot.create().factoryBuilder().managedChannel(channel) - .scheduler(sessionScheduler).build().newSession(); + final SessionImpl clientSessionImpl = + DaggerDeephavenSessionRoot.create().factoryBuilder().managedChannel(channel) + .scheduler(sessionScheduler).build().newSession(); + session = clientSessionImpl; + serverSessionState = Require.neqNull(server().sessionService().getSessionForToken( + clientSessionImpl._hackBearerHandler().getCurrentToken()), "SessionState"); } @Override diff --git a/java-client/session-dagger/src/test/java/io/deephaven/client/impl/TableServiceAsyncTest.java b/java-client/session-dagger/src/test/java/io/deephaven/client/impl/TableServiceAsyncTest.java index 39089fc3b5c..1e8b7f188d7 100644 --- a/java-client/session-dagger/src/test/java/io/deephaven/client/impl/TableServiceAsyncTest.java +++ b/java-client/session-dagger/src/test/java/io/deephaven/client/impl/TableServiceAsyncTest.java @@ -5,7 +5,12 @@ import io.deephaven.client.DeephavenSessionTestBase; import io.deephaven.client.impl.TableService.TableHandleFuture; +import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.testutil.TstUtils; +import io.deephaven.engine.util.TableTools; import io.deephaven.qst.table.TableSpec; +import io.deephaven.util.SafeCloseable; import org.junit.Test; import java.time.Duration; @@ -19,7 +24,7 @@ public class TableServiceAsyncTest extends DeephavenSessionTestBase { private static final Duration GETTIME = Duration.ofSeconds(15); - private static final int CHAIN_OPS = 50; + private static final int CHAIN_OPS = 250; private static final int CHAIN_ROWS = 1000; @Test(timeout = 20000) @@ -27,7 +32,7 @@ public void longChainAsyncExportOnlyLast() throws ExecutionException, Interrupte final List longChain = createLongChain(); final TableSpec longChainLast = longChain.get(longChain.size() - 1); try (final TableHandle handle = get(session.executeAsync(longChainLast))) { - checkSucceeded(handle); + checkSucceeded(handle, CHAIN_OPS); } } @@ -36,9 +41,10 @@ public void longChainAsyncExportAll() throws ExecutionException, InterruptedExce final List longChain = createLongChain(); final List futures = session.executeAsync(longChain); try { + int chainLength = 0; for (final TableHandleFuture future : futures) { try (final TableHandle handle = get(future)) { - checkSucceeded(handle); + checkSucceeded(handle, ++chainLength); } } } catch (final Throwable t) { @@ -55,7 +61,7 @@ public void longChainAsyncExportAllCancelAllButLast() // Cancel or close all but the last one TableService.TableHandleFuture.cancelOrClose(futures.subList(0, futures.size() - 1), true); try (final TableHandle lastHandle = get(futures.get(futures.size() - 1))) { - checkSucceeded(lastHandle); + checkSucceeded(lastHandle, CHAIN_OPS); } } @@ -68,7 +74,7 @@ public void immediatelyCompletedFromCachedTableServices() try (final TableHandle ignored = get(tableService.executeAsync(longChainLast))) { for (int i = 0; i < 1000; ++i) { try (final TableHandle handle = get(tableService.executeAsync(longChainLast))) { - checkSucceeded(handle); + checkSucceeded(handle, CHAIN_OPS); } } } @@ -79,22 +85,30 @@ private static TableHandle get(TableHandleFuture future) return future.getOrCancel(GETTIME); } - private static void checkSucceeded(TableHandle x) { + private void checkSucceeded(TableHandle x, int chainLength) { assertThat(x.isSuccessful()).isTrue(); + try (final SafeCloseable ignored = getExecutionContext().open()) { + final Table result = serverSessionState.getExport(x.exportId().id()).get(); + ExecutionContext.getContext().getQueryScope().putParam("ChainLength", chainLength); + final Table expected = TableTools.emptyTable(CHAIN_ROWS).update("Current = ii - 1 + ChainLength"); + TstUtils.assertTableEquals(expected, result); + } } private static List createLongChain() { return createLongChain(CHAIN_OPS, CHAIN_ROWS); } - private static List createLongChain(int numColumns, int numRows) { - final List longChain = new ArrayList<>(numColumns); - for (int i = 0; i < numColumns; ++i) { + private static List createLongChain(int chainLength, int numRows) { + final List longChain = new ArrayList<>(chainLength); + for (int i = 0; i < chainLength; ++i) { if (i == 0) { - longChain.add(TableSpec.empty(numRows).view("I_0=ii")); + longChain.add(TableSpec.empty(numRows).view("Current = ii")); } else { final TableSpec prev = longChain.get(i - 1); - longChain.add(prev.updateView("I_" + i + " = 1 + I_" + (i - 1))); + // Note: it's important that this formula is constant with respect to "i", otherwise we'll spend a lot + // of time compiling formulas + longChain.add(prev.updateView("Current = 1 + Current")); } } return longChain; diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/BearerHandler.java b/java-client/session/src/main/java/io/deephaven/client/impl/BearerHandler.java index ce3d9a80fd5..8c1d3e3ff46 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/BearerHandler.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/BearerHandler.java @@ -1,5 +1,6 @@ package io.deephaven.client.impl; +import com.google.common.annotations.VisibleForTesting; import io.grpc.CallCredentials; import io.grpc.CallOptions; import io.grpc.Channel; @@ -14,6 +15,7 @@ import java.util.Objects; import java.util.Optional; +import java.util.UUID; import java.util.concurrent.Executor; import static io.deephaven.client.impl.Authentication.AUTHORIZATION_HEADER; @@ -56,6 +58,11 @@ public void setBearerToken(String bearerToken) { } } + @VisibleForTesting + public UUID getCurrentToken() { + return UUID.fromString(bearerToken); + } + private void handleMetadata(Metadata metadata) { parseBearerToken(metadata).ifPresent(BearerHandler.this::setBearerToken); } diff --git a/java-client/session/src/main/java/io/deephaven/client/impl/SessionImpl.java b/java-client/session/src/main/java/io/deephaven/client/impl/SessionImpl.java index 7ef023b7e12..683f78e472d 100644 --- a/java-client/session/src/main/java/io/deephaven/client/impl/SessionImpl.java +++ b/java-client/session/src/main/java/io/deephaven/client/impl/SessionImpl.java @@ -123,8 +123,8 @@ private SessionImpl(SessionImplConfig config, DeephavenChannel bearerChannel, Du pingFrequency.toNanos(), pingFrequency.toNanos(), TimeUnit.NANOSECONDS); } - // exposed for Flight - BearerHandler _hackBearerHandler() { + // exposed for Flight and testing + public BearerHandler _hackBearerHandler() { return bearerHandler; } diff --git a/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java b/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java index 1288790f503..fa3de22157b 100644 --- a/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java +++ b/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java @@ -98,7 +98,7 @@ public GrpcServer server() { } @VisibleForTesting - SessionService sessionService() { + public SessionService sessionService() { return sessionService; } diff --git a/server/src/test/java/io/deephaven/server/runner/DeephavenApiServerTestBase.java b/server/src/test/java/io/deephaven/server/runner/DeephavenApiServerTestBase.java index 37679b44fa5..a064a1ef415 100644 --- a/server/src/test/java/io/deephaven/server/runner/DeephavenApiServerTestBase.java +++ b/server/src/test/java/io/deephaven/server/runner/DeephavenApiServerTestBase.java @@ -6,6 +6,7 @@ import dagger.BindsInstance; import dagger.Component; import io.deephaven.client.ClientDefaultsModule; +import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.context.TestExecutionContext; import io.deephaven.engine.liveness.LivenessScope; import io.deephaven.engine.liveness.LivenessScopeStack; @@ -85,7 +86,8 @@ interface Builder { @Rule public final GrpcCleanupRule grpcCleanup = new GrpcCleanupRule(); - private SafeCloseable executionContext; + private ExecutionContext executionContext; + private SafeCloseable executionContextCloseable; private LogBuffer logBuffer; private SafeCloseable scopeCloseable; @@ -127,7 +129,8 @@ public void setUp() throws Exception { .injectFields(this); final PeriodicUpdateGraph updateGraph = server.getUpdateGraph().cast(); - executionContext = TestExecutionContext.createForUnitTests().withUpdateGraph(updateGraph).open(); + executionContext = TestExecutionContext.createForUnitTests().withUpdateGraph(updateGraph); + executionContextCloseable = executionContext.open(); if (updateGraph.isUnitTestModeAllowed()) { updateGraph.enableUnitTestMode(); updateGraph.resetForUnitTests(false); @@ -153,7 +156,7 @@ public void tearDown() throws Exception { if (updateGraph.isUnitTestModeAllowed()) { updateGraph.resetForUnitTests(true); } - executionContext.close(); + executionContextCloseable.close(); scheduler.shutdown(); } @@ -170,6 +173,10 @@ public ScriptSession getScriptSession() { return scriptSessionProvider.get(); } + public ExecutionContext getExecutionContext() { + return executionContext; + } + /** * The session token expiration * From 89faae3833d78d29b2cf3002807c742a7aa88489 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Mon, 13 Nov 2023 10:18:42 -0600 Subject: [PATCH 13/41] JS TreeTable must always track key column values (#4791) If the user doesn't subscribe to a key column, the TreeTable should still subscribe to it and track its values, but not confuse user code by populating them into TableData instances. Fixes #4774 --- .../deephaven/web/client/api/tree/JsTreeTable.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java index 18ae065378f..e102f79f7c9 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java @@ -176,13 +176,19 @@ private TreeViewportData(double offset, long viewportSize, double treeSize, Colu ViewportData.cleanData(dataColumns[rowDepthCol.getIndex()].getData(), rowDepthCol)); int constituentDepth = keyColumns.length + 2; + + // Without modifying this.columns (copied and frozen), make sure our key columns are present + // in the list of columns that we will copy data for the viewport + keyColumns.forEach((col, p1, p2) -> { + if (this.columns.indexOf(col) == -1) { + columns[columns.length] = col; + } + return null; + }); + for (int i = 0; i < columns.length; i++) { Column c = columns[i]; int index = c.getIndex(); - if (dataColumns[index] == null) { - // no data for this column, not requested in viewport - continue; - } // clean the data, since it will be exposed to the client data[index] = ViewportData.cleanData(dataColumns[index].getData(), c); From b362db342bf73c591a5de096ffaeb37be98d6f0b Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Mon, 13 Nov 2023 13:23:34 -0600 Subject: [PATCH 14/41] Ensure LongWrapper is formatted as a Java long (#4820) Fixes #4818 --- .../java/io/deephaven/web/client/api/i18n/JsNumberFormat.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/i18n/JsNumberFormat.java b/web/client-api/src/main/java/io/deephaven/web/client/api/i18n/JsNumberFormat.java index 6bc69bc746d..1343053263d 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/i18n/JsNumberFormat.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/i18n/JsNumberFormat.java @@ -162,7 +162,7 @@ public String format(NumberUnion number) { } else if (number.isBigInteger()) { return wrapped.format(number.asBigInteger().getWrapped()); } else if (number.isLongWrapper()) { - return wrapped.format(number.asLongWrapper().getWrapped()); + return wrapped.format((Long) number.asLongWrapper().getWrapped()); } throw new IllegalStateException("Can't format non-number object of type " + Js.typeof(number)); } From 1e2306fca00906e38b43ec2318db5799a447f638 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Mon, 13 Nov 2023 13:38:11 -0600 Subject: [PATCH 15/41] Always export scope objects before performing other interactions (#4816) Fixes #4604 --- .../io/deephaven/web/client/api/JsTable.java | 4 +- .../web/client/api/WorkerConnection.java | 50 +++++++++---- .../web/client/api/impl/TicketAndPromise.java | 70 ++++++++++++++++++ .../web/client/api/tree/JsTreeTable.java | 72 ++++++------------- 4 files changed, 128 insertions(+), 68 deletions(-) create mode 100644 web/client-api/src/main/java/io/deephaven/web/client/api/impl/TicketAndPromise.java diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java index a10f5cf7808..fcd9f450af7 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java @@ -1419,8 +1419,8 @@ public Promise partitionBy(Object keys, @JsOptional Boolean TypedTicket typedTicket = new TypedTicket(); typedTicket.setType(JsVariableType.PARTITIONEDTABLE); typedTicket.setTicket(partitionedTableTicket); - Promise fetchPromise = - new JsPartitionedTable(workerConnection, new JsWidget(workerConnection, typedTicket)).refetch(); + Promise fetchPromise = new JsWidget(workerConnection, typedTicket).refetch().then( + widget -> Promise.resolve(new JsPartitionedTable(workerConnection, widget))); // Ensure that the partition failure propagates first, but the result of the fetch will be returned - both // are running concurrently. diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java b/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java index 7237e5b5a66..f9611a1bc57 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/WorkerConnection.java @@ -49,6 +49,8 @@ import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.object_pb.FetchObjectResponse; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.object_pb_service.ObjectServiceClient; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.partitionedtable_pb_service.PartitionedTableServiceClient; +import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.session_pb.ExportRequest; +import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.session_pb.ExportResponse; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.session_pb.ReleaseRequest; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.session_pb.TerminationNotificationRequest; import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.session_pb_service.SessionServiceClient; @@ -78,6 +80,7 @@ import io.deephaven.web.client.api.console.JsVariableDefinition; import io.deephaven.web.client.api.console.JsVariableType; import io.deephaven.web.client.api.i18n.JsTimeZone; +import io.deephaven.web.client.api.impl.TicketAndPromise; import io.deephaven.web.client.api.lifecycle.HasLifecycle; import io.deephaven.web.client.api.parse.JsDataHandler; import io.deephaven.web.client.api.state.StateCache; @@ -701,11 +704,12 @@ final class Listener implements Consumer { @Override public void accept(JsVariableChanges changes) { JsVariableDefinition foundField = changes.getCreated() - .find((field, p1, p2) -> field.getTitle().equals(name) && field.getType().equals(type)); + .find((field, p1, p2) -> field.getTitle().equals(name) + && field.getType().equalsIgnoreCase(type)); if (foundField == null) { foundField = changes.getUpdated().find((field, p1, p2) -> field.getTitle().equals(name) - && field.getType().equals(type)); + && field.getType().equalsIgnoreCase(type)); } if (foundField != null) { @@ -756,23 +760,23 @@ public Promise getTable(JsVariableDefinition varDef, @Nullable Boolean } public Promise getObject(JsVariableDefinition definition) { - if (JsVariableType.TABLE.equals(definition.getType())) { + if (JsVariableType.TABLE.equalsIgnoreCase(definition.getType())) { return getTable(definition, null); - } else if (JsVariableType.FIGURE.equals(definition.getType())) { + } else if (JsVariableType.FIGURE.equalsIgnoreCase(definition.getType())) { return getFigure(definition); - } else if (JsVariableType.PANDAS.equals(definition.getType())) { + } else if (JsVariableType.PANDAS.equalsIgnoreCase(definition.getType())) { return getWidget(definition) .then(widget -> widget.getExportedObjects()[0].fetch()); - } else if (JsVariableType.PARTITIONEDTABLE.equals(definition.getType())) { + } else if (JsVariableType.PARTITIONEDTABLE.equalsIgnoreCase(definition.getType())) { return getPartitionedTable(definition); - } else if (JsVariableType.HIERARCHICALTABLE.equals(definition.getType())) { + } else if (JsVariableType.HIERARCHICALTABLE.equalsIgnoreCase(definition.getType())) { return getHierarchicalTable(definition); } else { - if (JsVariableType.TABLEMAP.equals(definition.getType())) { + if (JsVariableType.TABLEMAP.equalsIgnoreCase(definition.getType())) { JsLog.warn( "TableMap is now known as PartitionedTable, fetching as a plain widget. To fetch as a PartitionedTable use that as the type."); } - if (JsVariableType.TREETABLE.equals(definition.getType())) { + if (JsVariableType.TREETABLE.equalsIgnoreCase(definition.getType())) { JsLog.warn( "TreeTable is now HierarchicalTable, fetching as a plain widget. To fetch as a HierarchicalTable use that as this type."); } @@ -886,15 +890,25 @@ public Promise whenServerReady(String operationName) { return Promise.resolve(this); default: // not possible, means null state - // noinspection unchecked - return (Promise) Promise.reject("Can't " + operationName + " while connection is in state " + state); + return Promise.reject("Can't " + operationName + " while connection is in state " + state); } } + private TicketAndPromise exportScopeTicket(JsVariableDefinition varDef) { + Ticket ticket = getConfig().newTicket(); + return new TicketAndPromise<>(ticket, whenServerReady("exportScopeTicket").then(server -> { + ExportRequest req = new ExportRequest(); + req.setSourceId(createTypedTicket(varDef).getTicket()); + req.setResultId(ticket); + return Callbacks.grpcUnaryPromise( + c -> sessionServiceClient().exportFromTicket(req, metadata(), c::apply)); + }), this); + } + public Promise getPartitionedTable(JsVariableDefinition varDef) { return whenServerReady("get a partitioned table") - .then(server -> new JsPartitionedTable(this, new JsWidget(this, createTypedTicket(varDef))) - .refetch()); + .then(server -> getWidget(varDef)) + .then(widget -> new JsPartitionedTable(this, widget).refetch()); } public Promise getTreeTable(JsVariableDefinition varDef) { @@ -906,7 +920,7 @@ public Promise getHierarchicalTable(JsVariableDefinition varDef) { } public Promise getFigure(JsVariableDefinition varDef) { - if (!varDef.getType().equals("Figure")) { + if (!varDef.getType().equalsIgnoreCase("Figure")) { throw new IllegalArgumentException("Can't load as a figure: " + varDef.getType()); } return whenServerReady("get a figure") @@ -935,7 +949,13 @@ private TypedTicket createTypedTicket(JsVariableDefinition varDef) { } public Promise getWidget(JsVariableDefinition varDef) { - return getWidget(createTypedTicket(varDef)); + return exportScopeTicket(varDef) + .race(ticket -> { + TypedTicket typedTicket = new TypedTicket(); + typedTicket.setType(varDef.getType()); + typedTicket.setTicket(ticket); + return getWidget(typedTicket); + }).promise(); } public Promise getWidget(TypedTicket typedTicket) { diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/impl/TicketAndPromise.java b/web/client-api/src/main/java/io/deephaven/web/client/api/impl/TicketAndPromise.java new file mode 100644 index 00000000000..4f850da0a9f --- /dev/null +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/impl/TicketAndPromise.java @@ -0,0 +1,70 @@ +package io.deephaven.web.client.api.impl; + +import elemental2.promise.IThenable; +import elemental2.promise.Promise; +import io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.ticket_pb.Ticket; +import io.deephaven.web.client.api.WorkerConnection; +import io.deephaven.web.shared.fu.JsFunction; + +/** + * Pair of ticket and the promise that indicates it has been resolved. Tickets are usable before they are resolved, but + * to ensure that all operations completed successfully, the promise should be used to handle errors. + */ +public class TicketAndPromise implements IThenable { + private final Ticket ticket; + private final Promise promise; + private final WorkerConnection connection; + private boolean released = false; + + public TicketAndPromise(Ticket ticket, Promise promise, WorkerConnection connection) { + this.ticket = ticket; + this.promise = promise; + this.connection = connection; + } + + public TicketAndPromise(Ticket ticket, WorkerConnection connection) { + this(ticket, (Promise) Promise.resolve(ticket), connection); + } + + public Promise promise() { + return promise; + } + + public Ticket ticket() { + return ticket; + } + + @Override + public TicketAndPromise then(ThenOnFulfilledCallbackFn onFulfilled) { + return new TicketAndPromise<>(ticket, promise.then(onFulfilled), connection); + } + + /** + * Rather than waiting for the original promise to succeed, lets the caller start a new call based only on the + * original ticket. The intent of "race" here is unlike Promise.race(), where the first to succeed should resolve - + * instead, this raced call will be sent to the server even though the previous call has not successfully returned, + * and the server is responsible for ensuring they happen in the correct order. + * + * @param racedCall the call to perform at the same time that any pending call is happening + * @return a new TicketAndPromise that will resolve when all work is successful + * @param type of the next call to perform + */ + public TicketAndPromise race(JsFunction> racedCall) { + IThenable raced = racedCall.apply(ticket); + return new TicketAndPromise<>(ticket, Promise.all(promise, raced).then(ignore -> raced), connection); + } + + @Override + public IThenable then(ThenOnFulfilledCallbackFn onFulfilled, + ThenOnRejectedCallbackFn onRejected) { + return promise.then(onFulfilled, onRejected); + } + + public void release() { + if (!released) { + // don't double-release, in cases where the same ticket is used for multiple parts of the request + released = true; + connection.releaseTicket(ticket); + } + } +} diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java index e102f79f7c9..07153a5f0db 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java @@ -41,6 +41,7 @@ import io.deephaven.web.client.api.barrage.def.InitialTableDefinition; import io.deephaven.web.client.api.barrage.stream.BiDiStream; import io.deephaven.web.client.api.filter.FilterCondition; +import io.deephaven.web.client.api.impl.TicketAndPromise; import io.deephaven.web.client.api.lifecycle.HasLifecycle; import io.deephaven.web.client.api.subscription.ViewportData; import io.deephaven.web.client.api.subscription.ViewportRow; @@ -52,7 +53,6 @@ import io.deephaven.web.shared.data.*; import io.deephaven.web.shared.data.columns.ColumnData; import jsinterop.annotations.JsIgnore; -import jsinterop.annotations.JsMethod; import jsinterop.annotations.JsNullable; import jsinterop.annotations.JsOptional; import jsinterop.annotations.JsOverlay; @@ -120,33 +120,6 @@ public class JsTreeTable extends HasLifecycle implements ServerObject { private static final double ACTION_EXPAND_WITH_DESCENDENTS = 0b011; private static final double ACTION_COLLAPSE = 0b100; - /** - * Pair of ticket and the promise that indicates it has been resolved. Tickets are usable before they are resolved, - * but to ensure that all operations completed successfully, the promise should be used to handle errors. - */ - private class TicketAndPromise { - private final Ticket ticket; - private final Promise promise; - private boolean released = false; - - private TicketAndPromise(Ticket ticket, Promise promise) { - this.ticket = ticket; - this.promise = promise; - } - - private TicketAndPromise(Ticket ticket) { - this(ticket, Promise.resolve(ticket)); - } - - public void release() { - if (!released) { - // don't double-release, in cases where the same ticket is used for multiple parts of the request - released = true; - connection.releaseTicket(ticket); - } - } - } - @TsInterface @TsName(namespace = "dh") public class TreeViewportData implements TableData { @@ -386,15 +359,15 @@ private enum RebuildStep { // The current filter and sort state private List filters = new ArrayList<>(); private List sorts = new ArrayList<>(); - private TicketAndPromise filteredTable; - private TicketAndPromise sortedTable; + private TicketAndPromise filteredTable; + private TicketAndPromise sortedTable; // Tracking for the current/next key table contents. Note that the key table doesn't necessarily // only include key columns, but all HierarchicalTable.isExpandByColumn columns. private Object[][] keyTableData; private Promise keyTable; - private TicketAndPromise viewTicket; + private TicketAndPromise viewTicket; private Promise> stream; // the "next" set of filters/sorts that we'll use. these either are "==" to the above fields, or are scheduled @@ -538,15 +511,15 @@ public JsTreeTable(WorkerConnection workerConnection, JsWidget widget) { .then(cts -> Promise.resolve(new JsTable(connection, cts)))); } - private TicketAndPromise prepareFilter() { + private TicketAndPromise prepareFilter() { if (filteredTable != null) { return filteredTable; } if (nextFilters.isEmpty()) { - return new TicketAndPromise(widget.getTicket()); + return new TicketAndPromise<>(widget.getTicket(), connection); } Ticket ticket = connection.getConfig().newTicket(); - filteredTable = new TicketAndPromise(ticket, Callbacks.grpcUnaryPromise(c -> { + filteredTable = new TicketAndPromise<>(ticket, Callbacks.grpcUnaryPromise(c -> { HierarchicalTableApplyRequest applyFilter = new HierarchicalTableApplyRequest(); applyFilter.setFiltersList( @@ -554,11 +527,11 @@ private TicketAndPromise prepareFilter() { applyFilter.setInputHierarchicalTableId(widget.getTicket()); applyFilter.setResultHierarchicalTableId(ticket); connection.hierarchicalTableServiceClient().apply(applyFilter, connection.metadata(), c::apply); - })); + }), connection); return filteredTable; } - private TicketAndPromise prepareSort(TicketAndPromise prevTicket) { + private TicketAndPromise prepareSort(TicketAndPromise prevTicket) { if (sortedTable != null) { return sortedTable; } @@ -566,14 +539,14 @@ private TicketAndPromise prepareSort(TicketAndPromise prevTicket) { return prevTicket; } Ticket ticket = connection.getConfig().newTicket(); - sortedTable = new TicketAndPromise(ticket, Callbacks.grpcUnaryPromise(c -> { + sortedTable = new TicketAndPromise<>(ticket, Callbacks.grpcUnaryPromise(c -> { HierarchicalTableApplyRequest applyFilter = new HierarchicalTableApplyRequest(); applyFilter.setSortsList(nextSort.stream().map(Sort::makeDescriptor).toArray( io.deephaven.javascript.proto.dhinternal.io.deephaven.proto.table_pb.SortDescriptor[]::new)); - applyFilter.setInputHierarchicalTableId(prevTicket.ticket); + applyFilter.setInputHierarchicalTableId(prevTicket.ticket()); applyFilter.setResultHierarchicalTableId(ticket); connection.hierarchicalTableServiceClient().apply(applyFilter, connection.metadata(), c::apply); - })); + }), connection); return sortedTable; } @@ -593,15 +566,15 @@ private Promise makeKeyTable() { return keyTable; } - private TicketAndPromise makeView(TicketAndPromise prevTicket) { + private TicketAndPromise makeView(TicketAndPromise prevTicket) { if (viewTicket != null) { return viewTicket; } Ticket ticket = connection.getConfig().newTicket(); Promise keyTable = makeKeyTable(); - viewTicket = new TicketAndPromise(ticket, Callbacks.grpcUnaryPromise(c -> { + viewTicket = new TicketAndPromise<>(ticket, Callbacks.grpcUnaryPromise(c -> { HierarchicalTableViewRequest viewRequest = new HierarchicalTableViewRequest(); - viewRequest.setHierarchicalTableId(prevTicket.ticket); + viewRequest.setHierarchicalTableId(prevTicket.ticket()); viewRequest.setResultViewId(ticket); keyTable.then(t -> { if (keyTableData[0].length > 0) { @@ -616,7 +589,7 @@ private TicketAndPromise makeView(TicketAndPromise prevTicket) { c.apply(error, null); return null; }); - })); + }), connection); return viewTicket; } @@ -658,9 +631,9 @@ private void replaceSubscription(RebuildStep step) { TicketAndPromise view = makeView(sort); return Promise.all( keyTable, - filter.promise, - sort.promise, - view.promise); + filter.promise(), + sort.promise(), + view.promise()); }) .then(results -> { BitSet columnsBitset = makeColumnSubscriptionBitset(); @@ -699,7 +672,7 @@ private void replaceSubscription(RebuildStep step) { updateInterval, 0, 0); double tableTicketOffset = BarrageSubscriptionRequest.createTicketVector(doGetRequest, - viewTicket.ticket.getTicket_asU8()); + viewTicket.ticket().getTicket_asU8()); BarrageSubscriptionRequest.startBarrageSubscriptionRequest(doGetRequest); BarrageSubscriptionRequest.addTicket(doGetRequest, tableTicketOffset); BarrageSubscriptionRequest.addColumns(doGetRequest, columnsOffset); @@ -799,7 +772,7 @@ private void handleUpdate(List nextSort, List nextFilters this.filters = nextFilters; if (fireEvent) { - CustomEventInit updatedEvent = CustomEventInit.create(); + CustomEventInit updatedEvent = CustomEventInit.create(); updatedEvent.setDetail(viewportData); fireEvent(EVENT_UPDATED, updatedEvent); } @@ -1192,7 +1165,6 @@ public Promise selectDistinct(Column[] columns) { }); } - @JsMethod public Promise getTotalsTableConfig() { // we want to communicate to the JS dev that there is no default config, so we allow // returning null here, rather than a default config. They can then easily build a @@ -1201,7 +1173,6 @@ public Promise getTotalsTableConfig() { return sourceTable.get().then(t -> Promise.resolve(t.getTotalsTableConfig())); } - @JsMethod public Promise getTotalsTable(@JsOptional Object config) { return sourceTable.get().then(t -> { // if this is the first time it is used, it might not be filtered correctly, so check that the filters match @@ -1213,7 +1184,6 @@ public Promise getTotalsTable(@JsOptional Object config) { }); } - @JsMethod public Promise getGrandTotalsTable(@JsOptional Object config) { return sourceTable.get().then(t -> Promise.resolve(t.getGrandTotalsTable(config))); } From a61a8ef123c55dd8b94dc3e16c0722a59d0f8e44 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Mon, 13 Nov 2023 15:32:31 -0600 Subject: [PATCH 16/41] Server logs should be written when integration tests fail (#4804) Fixes #4766 --- buildSrc/src/main/groovy/Docker.groovy | 66 ++++++++++++++----- .../groovy/io.deephaven.python-wheel.gradle | 8 ++- proto/raw-js-openapi/build.gradle | 2 +- 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/buildSrc/src/main/groovy/Docker.groovy b/buildSrc/src/main/groovy/Docker.groovy index c1dfb6c7863..618dd9f6404 100644 --- a/buildSrc/src/main/groovy/Docker.groovy +++ b/buildSrc/src/main/groovy/Docker.groovy @@ -300,7 +300,34 @@ class Docker { if (cfg.copyOut && !cfg.showLogsOnSuccess) { // Single task with explicit inputs and outputs, to let gradle detect if it is up to date, and let docker - // cache what it can. + // cache what it can. While far more efficient for gradle to run (or rather, know when it does not need to + // run), it is also more bug-prone while we try to get various competing features working. + // + // To handle these use cases, we're not using dependsOn as we typically would do, but instead using + // finalizedBy and onlyIf. Here's a mermaid diagram: + // + // graph LR; + // MakeImage -. finalizedBy .-> Run + // Sync -- dependsOn --> MakeImage + // Run -. finalizedBy .-> Sync + // + // + // Unlike "A dependsOn B", "B finalized A" will let B run if A failed, and will not run A if B must run. + // Combining the chain of finalizedBys between MakeImage <- Run <- Sync with the dependsOn from + // Sync -> MakeImage lets us handle the following cases: + // * Successful run, output is sync'd afterwards, final task succeeds + // * Failed run, output is sync'd afterwards, final task fails + // * Failed image creation, no run, no sync, no final task + // * Previously successful run with no source changes, no tasks run (all "UP-TO-DATE") + // + // Tests to run to confirm functionality: + // * After changes, confirm that :web:assemble runs (isn't all "UP-TO-DATE") + // * Then run again with no changes, confirm all are UP-TO-DATE, roughly 2s build time + // * Edit a test that uses deephavenDocker to fail, confirm that the test fails, that the test-reports + // are copied out, and that server logs are written to console + // * Ensure that if the test is set to pass that the test-reports are copied out, and server logs are + // not written. + // Note that at this time integration tests using the deephavenDocker plugin are never UP-TO-DATE. // Note that if "showLogsOnSuccess" is true, we don't run this way, since that would omit logs when cached. def buildAndRun = project.tasks.register("${taskName}Run", CombinedDockerRunTask) { cacheableDockerTask -> @@ -337,35 +364,38 @@ class Docker { } } - // Handle copying failure. This is now distinct from the "actual" Sync task that depends directly - // on the CombinedDockerRunTask. - def syncAfterFail = project.tasks.register("${taskName}SyncAfterFail", Sync) { sync -> + // Specify that makeImage is finalized by buildAndRun - that is, in this configuration buildAndRun + // must run after makeImage finishes + makeImage.configure {it -> + it.finalizedBy(buildAndRun) + } + + // Handle copying output from the docker task to the user-controlled location + def syncOutput = project.tasks.register(taskName, Sync) { sync -> sync.with { + dependsOn(makeImage) // run the provided closure first cfg.copyOut.execute(sync) // then set the from location from dockerCopyLocation - onlyIf { buildAndRun.get().state.failure != null } + doLast { + // If the actual task has already failed, we need to fail this task to signal to any downstream + // tasks to not continue. Under normal circumstances, we might just not run this Sync task at + // all to signal this, however in our case we want to copy out artifacts of failure for easier + // debugging. + if (buildAndRun.get().state.failure != null) { + throw new GradleException('Docker task failed, see earlier task failures for details') + } + } } } buildAndRun.configure {t -> - t.finalizedBy syncAfterFail + t.finalizedBy syncOutput } - // Sync outputs to the desired location - return project.tasks.register(taskName, Sync) { sync -> - sync.with { - dependsOn buildAndRun - - // run the provided closure first - cfg.copyOut.execute(sync) - - // then set the from location - from dockerCopyLocation - } - } + return syncOutput } // With no outputs, we can use the standard individual containers, and gradle will have to re-run each time // the task is invoked, can never be marked as up to date. diff --git a/buildSrc/src/main/groovy/io.deephaven.python-wheel.gradle b/buildSrc/src/main/groovy/io.deephaven.python-wheel.gradle index 47af3abbd3d..08b6814fe2c 100644 --- a/buildSrc/src/main/groovy/io.deephaven.python-wheel.gradle +++ b/buildSrc/src/main/groovy/io.deephaven.python-wheel.gradle @@ -34,6 +34,8 @@ configurations { } project.evaluationDependsOn(Docker.registryProject('python')) +def wheelPath = project.layout.buildDirectory.dir('wheel') + def buildWheel = Docker.registerDockerTask(project, 'buildWheel') { config -> config.copyIn { Sync sync -> // apply the extension spec, copying into src @@ -58,10 +60,12 @@ def buildWheel = Docker.registerDockerTask(project, 'buildWheel') { config -> config.parentContainers = [ Docker.registryTask(project, 'python') ] config.containerOutPath='/usr/src/app/dist' config.copyOut { Sync sync -> - sync.into "build/wheel" + sync.into wheelPath } } artifacts { - pythonWheel buildWheel + pythonWheel(wheelPath) { + builtBy buildWheel + } } diff --git a/proto/raw-js-openapi/build.gradle b/proto/raw-js-openapi/build.gradle index a9e9a9bd909..782df4ad7d3 100644 --- a/proto/raw-js-openapi/build.gradle +++ b/proto/raw-js-openapi/build.gradle @@ -15,7 +15,7 @@ dependencies { def webpackSourcesLocation = layout.buildDirectory.dir("${buildDir}/dhapi") -Docker.registerDockerTask(project, 'webpackSources') { +def webpackSources = Docker.registerDockerTask(project, 'webpackSources') { copyIn { from(configurations.js) { // note: we are only copying the JS and not TS files. From df45e1ce170d5a42036f73dbedd4623e31b4cf82 Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Mon, 13 Nov 2023 19:12:32 -0600 Subject: [PATCH 17/41] Moved Parquet specific methods out of DateTimeUtils (#4819) --- .../java/io/deephaven/time/DateTimeUtils.java | 52 --------------- .../io/deephaven/time/TestDateTimeUtils.java | 16 ----- .../pagestore/topage/ToLocalDateTimePage.java | 8 +-- .../transfer/LocalDateTimeArrayTransfer.java | 4 +- .../table/transfer/LocalDateTimeTransfer.java | 4 +- .../transfer/LocalDateTimeVectorTransfer.java | 4 +- .../parquet/table/util/TransferUtils.java | 66 +++++++++++++++++++ .../parquet/table/TestTransferUtils.java | 57 ++++++++++++++++ .../ReplicateParquetTransferObjects.java | 6 +- 9 files changed, 137 insertions(+), 80 deletions(-) create mode 100644 extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java create mode 100644 extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java diff --git a/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java b/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java index 7f86e80403e..231547ee56f 100644 --- a/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java +++ b/engine/time/src/main/java/io/deephaven/time/DateTimeUtils.java @@ -982,21 +982,6 @@ public static long epochNanos(@Nullable final ZonedDateTime dateTime) { return safeComputeNanos(dateTime.toEpochSecond(), dateTime.getNano()); } - /** - * Returns nanoseconds from the Epoch for a {@link LocalDateTime} value in UTC timezone. - * - * @param localDateTime the local date time to compute the Epoch offset for - * @return nanoseconds since Epoch, or a NULL_LONG value if the local date time is null - */ - @ScriptApi - public static long epochNanosUTC(@Nullable final LocalDateTime localDateTime) { - if (localDateTime == null) { - return NULL_LONG; - } - return TimeUnit.SECONDS.toNanos(localDateTime.toEpochSecond(ZoneOffset.UTC)) - + localDateTime.toLocalTime().getNano(); - } - /** * Returns microseconds from the Epoch for an {@link Instant} value. * @@ -1415,43 +1400,6 @@ public static ZonedDateTime excelToZonedDateTime(final double excel, @Nullable f return epochMillisToZonedDateTime(excelTimeToEpochMillis(excel, timeZone), timeZone); } - /** - * Converts nanoseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. - * - * @param nanos nanoseconds since Epoch - * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input nanoseconds from the - * Epoch converted to a {@link LocalDateTime} in UTC timezone - */ - public static @Nullable LocalDateTime epochNanosToLocalDateTimeUTC(final long nanos) { - return nanos == NULL_LONG ? null - : LocalDateTime.ofEpochSecond(nanos / 1_000_000_000L, (int) (nanos % 1_000_000_000L), ZoneOffset.UTC); - } - - /** - * Converts microseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. - * - * @param micros microseconds since Epoch - * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input microseconds from the - * Epoch converted to a {@link LocalDateTime} in UTC timezone - */ - public static @Nullable LocalDateTime epochMicrosToLocalDateTimeUTC(final long micros) { - return micros == NULL_LONG ? null - : LocalDateTime.ofEpochSecond(micros / 1_000_000L, (int) ((micros % 1_000_000L) * MICRO), - ZoneOffset.UTC); - } - - /** - * Converts milliseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. - * - * @param millis milliseconds since Epoch - * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input milliseconds from the - * Epoch converted to a {@link LocalDateTime} in UTC timezone - */ - public static @Nullable LocalDateTime epochMillisToLocalDateTimeUTC(final long millis) { - return millis == NULL_LONG ? null - : LocalDateTime.ofEpochSecond(millis / 1_000L, (int) ((millis % 1_000L) * MILLI), ZoneOffset.UTC); - } - // endregion // region Arithmetic diff --git a/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java b/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java index 4e3e6b3d501..c8fbfc2d78d 100644 --- a/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java +++ b/engine/time/src/test/java/io/deephaven/time/TestDateTimeUtils.java @@ -1391,10 +1391,6 @@ public void testEpochNanos() { TestCase.assertEquals(nanos, DateTimeUtils.epochNanos(dt3)); TestCase.assertEquals(NULL_LONG, DateTimeUtils.epochNanos((ZonedDateTime) null)); - - final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(nanos, DateTimeUtils.epochNanosUTC(ldt)); - TestCase.assertEquals(NULL_LONG, DateTimeUtils.epochNanosUTC(null)); } public void testEpochMicros() { @@ -1460,10 +1456,6 @@ public void testEpochNanosTo() { TestCase.assertEquals(dt3, DateTimeUtils.epochNanosToZonedDateTime(nanos, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochNanosToZonedDateTime(NULL_LONG, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochNanosToZonedDateTime(nanos, null)); - - final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(ldt, DateTimeUtils.epochNanosToLocalDateTimeUTC(nanos)); - TestCase.assertNull(DateTimeUtils.epochNanosToLocalDateTimeUTC(NULL_LONG)); } public void testEpochMicrosTo() { @@ -1479,10 +1471,6 @@ public void testEpochMicrosTo() { TestCase.assertEquals(dt3, DateTimeUtils.epochMicrosToZonedDateTime(micros, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMicrosToZonedDateTime(NULL_LONG, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMicrosToZonedDateTime(micros, null)); - - final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(ldt, DateTimeUtils.epochMicrosToLocalDateTimeUTC(micros)); - TestCase.assertNull(DateTimeUtils.epochMicrosToLocalDateTimeUTC(NULL_LONG)); } public void testEpochMillisTo() { @@ -1498,10 +1486,6 @@ public void testEpochMillisTo() { TestCase.assertEquals(dt3, DateTimeUtils.epochMillisToZonedDateTime(millis, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMillisToZonedDateTime(NULL_LONG, TZ_JP)); TestCase.assertNull(DateTimeUtils.epochMillisToZonedDateTime(millis, null)); - - final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); - TestCase.assertEquals(ldt, DateTimeUtils.epochMillisToLocalDateTimeUTC(millis)); - TestCase.assertNull(DateTimeUtils.epochMillisToLocalDateTimeUTC(NULL_LONG)); } public void testEpochSecondsTo() { diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java index 8f906915d56..356d547c74e 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/pagestore/topage/ToLocalDateTimePage.java @@ -5,7 +5,7 @@ import io.deephaven.chunk.ChunkType; import io.deephaven.chunk.attributes.Any; -import io.deephaven.time.DateTimeUtils; +import io.deephaven.parquet.table.util.TransferUtils; import io.deephaven.util.QueryConstants; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.jetbrains.annotations.NotNull; @@ -79,21 +79,21 @@ private static LocalDateTime[] convertResultHelper(@NotNull final Object result, private static final class ToLocalDateTimePageFromMillis extends ToLocalDateTimePage { @Override public LocalDateTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::epochMillisToLocalDateTimeUTC); + return convertResultHelper(result, TransferUtils::epochMillisToLocalDateTimeUTC); } } private static final class ToLocalDateTimePageFromMicros extends ToLocalDateTimePage { @Override public LocalDateTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::epochMicrosToLocalDateTimeUTC); + return convertResultHelper(result, TransferUtils::epochMicrosToLocalDateTimeUTC); } } private static final class ToLocalDateTimePageFromNanos extends ToLocalDateTimePage { @Override public LocalDateTime[] convertResult(@NotNull final Object result) { - return convertResultHelper(result, DateTimeUtils::epochNanosToLocalDateTimeUTC); + return convertResultHelper(result, TransferUtils::epochNanosToLocalDateTimeUTC); } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java index 16ed5fc6d0f..4b1e8a74da4 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeArrayTransfer.java @@ -10,7 +10,7 @@ import io.deephaven.engine.rowset.RowSequence; import io.deephaven.engine.table.ColumnSource; -import io.deephaven.time.DateTimeUtils; +import io.deephaven.parquet.table.util.TransferUtils; import org.jetbrains.annotations.NotNull; import java.nio.LongBuffer; @@ -38,7 +38,7 @@ void resizeBuffer(final int length) { @Override void copyToBuffer(@NotNull final EncodedData data) { for (final LocalDateTime t : data.encodedValues) { - buffer.put(DateTimeUtils.epochNanosUTC(t)); + buffer.put(TransferUtils.epochNanosUTC(t)); } } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java index 38ca4a338ea..b5e91a05a17 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/transfer/LocalDateTimeTransfer.java @@ -12,7 +12,7 @@ import io.deephaven.chunk.attributes.Values; import io.deephaven.engine.rowset.RowSequence; import io.deephaven.engine.table.ColumnSource; -import io.deephaven.time.DateTimeUtils; +import io.deephaven.parquet.table.util.TransferUtils; import org.jetbrains.annotations.NotNull; import java.nio.LongBuffer; @@ -31,7 +31,7 @@ final class LocalDateTimeTransfer extends GettingPrimitiveTransfer> data) { try (final CloseableIterator dataIterator = data.encodedValues.iterator()) { - dataIterator.forEachRemaining((LocalDateTime t) -> buffer.put(DateTimeUtils.epochNanosUTC(t))); + dataIterator.forEachRemaining((LocalDateTime t) -> buffer.put(TransferUtils.epochNanosUTC(t))); } } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java new file mode 100644 index 00000000000..ab231f07dfe --- /dev/null +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/util/TransferUtils.java @@ -0,0 +1,66 @@ +package io.deephaven.parquet.table.util; + +import io.deephaven.time.DateTimeUtils; +import io.deephaven.util.QueryConstants; +import org.jetbrains.annotations.Nullable; + +import java.time.LocalDateTime; +import java.time.ZoneOffset; + +/** + * Internal library with utility methods for converting data between Deephaven and Parquet. + */ +public class TransferUtils { + /** + * Returns nanoseconds from the Epoch for a {@link LocalDateTime} value in UTC timezone. + * + * @param localDateTime the local date time to compute the Epoch offset for + * @return nanoseconds since Epoch, or a NULL_LONG value if the local date time is null + */ + public static long epochNanosUTC(@Nullable final LocalDateTime localDateTime) { + if (localDateTime == null) { + return QueryConstants.NULL_LONG; + } + return DateTimeUtils.secondsToNanos(localDateTime.toEpochSecond(ZoneOffset.UTC)) + + localDateTime.toLocalTime().getNano(); + } + + /** + * Converts nanoseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. + * + * @param nanos nanoseconds since Epoch + * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input nanoseconds from the + * Epoch converted to a {@link LocalDateTime} in UTC timezone + */ + public static @Nullable LocalDateTime epochNanosToLocalDateTimeUTC(final long nanos) { + return nanos == QueryConstants.NULL_LONG ? null + : LocalDateTime.ofEpochSecond(nanos / 1_000_000_000L, (int) (nanos % 1_000_000_000L), ZoneOffset.UTC); + } + + /** + * Converts microseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. + * + * @param micros microseconds since Epoch + * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input microseconds from the + * Epoch converted to a {@link LocalDateTime} in UTC timezone + */ + public static @Nullable LocalDateTime epochMicrosToLocalDateTimeUTC(final long micros) { + return micros == QueryConstants.NULL_LONG ? null + : LocalDateTime.ofEpochSecond(micros / 1_000_000L, (int) ((micros % 1_000_000L) * DateTimeUtils.MICRO), + ZoneOffset.UTC); + } + + /** + * Converts milliseconds from the Epoch to a {@link LocalDateTime} in UTC timezone. + * + * @param millis milliseconds since Epoch + * @return {@code null} if the input is {@link QueryConstants#NULL_LONG}; otherwise the input milliseconds from the + * Epoch converted to a {@link LocalDateTime} in UTC timezone + */ + public static @Nullable LocalDateTime epochMillisToLocalDateTimeUTC(final long millis) { + return millis == QueryConstants.NULL_LONG ? null + : LocalDateTime.ofEpochSecond(millis / 1_000L, (int) ((millis % 1_000L) * DateTimeUtils.MILLI), + ZoneOffset.UTC); + } + +} diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java new file mode 100644 index 00000000000..03a9ab96b62 --- /dev/null +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/TestTransferUtils.java @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.parquet.table; + +import io.deephaven.parquet.table.util.TransferUtils; +import io.deephaven.time.DateTimeUtils; +import io.deephaven.util.QueryConstants; +import junit.framework.TestCase; +import org.junit.Test; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; + +final public class TestTransferUtils { + + @Test + public void testEpochNanosUTC() { + final long nanos = 123456789123456789L; + final Instant dt2 = Instant.ofEpochSecond(0, nanos); + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(nanos, TransferUtils.epochNanosUTC(ldt)); + TestCase.assertEquals(QueryConstants.NULL_LONG, TransferUtils.epochNanosUTC(null)); + } + + @Test + public void testEpochNanosTo() { + final long nanos = 123456789123456789L; + final Instant dt2 = Instant.ofEpochSecond(0, nanos); + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(ldt, TransferUtils.epochNanosToLocalDateTimeUTC(nanos)); + TestCase.assertNull(TransferUtils.epochNanosToLocalDateTimeUTC(QueryConstants.NULL_LONG)); + } + + @Test + public void testEpochMicrosTo() { + long nanos = 123456789123456789L; + final long micros = DateTimeUtils.nanosToMicros(nanos); + nanos = DateTimeUtils.microsToNanos(micros); + final Instant dt2 = Instant.ofEpochSecond(0, nanos); + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(ldt, TransferUtils.epochMicrosToLocalDateTimeUTC(micros)); + TestCase.assertNull(TransferUtils.epochMicrosToLocalDateTimeUTC(QueryConstants.NULL_LONG)); + } + + @Test + public void testEpochMillisTo() { + long nanos = 123456789123456789L; + final long millis = DateTimeUtils.nanosToMillis(nanos); + nanos = DateTimeUtils.millisToNanos(millis); + final Instant dt2 = Instant.ofEpochSecond(0, nanos); + final LocalDateTime ldt = LocalDateTime.ofInstant(dt2, ZoneId.of("UTC")); + TestCase.assertEquals(ldt, TransferUtils.epochMillisToLocalDateTimeUTC(millis)); + TestCase.assertNull(TransferUtils.epochMillisToLocalDateTimeUTC(QueryConstants.NULL_LONG)); + } +} diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java index eb8333ebfeb..508386a3b0c 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateParquetTransferObjects.java @@ -69,9 +69,10 @@ public static void main(String[] args) throws IOException { replaceAll(PARQUET_INSTANT_VECTOR_TRANSFER_PATH, PARQUET_TIME_VECTOR_TRANSFER_PATH, null, NO_EXCEPTIONS, pairs); pairs = new String[][] { + {"io.deephaven.time.DateTimeUtils", "io.deephaven.parquet.table.util.TransferUtils"}, {"InstantArrayTransfer", "LocalDateTimeArrayTransfer"}, {"InstantVectorTransfer", "LocalDateTimeVectorTransfer"}, - {"DateTimeUtils.epochNanos", "DateTimeUtils.epochNanosUTC"}, + {"DateTimeUtils.epochNanos", "TransferUtils.epochNanosUTC"}, {"Instant", "LocalDateTime"} }; replaceAll(PARQUET_INSTANT_ARRAY_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_ARRAY_TRANSFER_PATH, null, @@ -80,9 +81,10 @@ public static void main(String[] args) throws IOException { NO_EXCEPTIONS, pairs); pairs = new String[][] { + {"io.deephaven.time.DateTimeUtils", "io.deephaven.parquet.table.util.TransferUtils"}, {"TimeTransfer", "LocalDateTimeTransfer"}, {"LocalTime", "LocalDateTime"}, - {"DateTimeUtils.nanosOfDay", "DateTimeUtils.epochNanosUTC"} + {"DateTimeUtils.nanosOfDay", "TransferUtils.epochNanosUTC"} }; replaceAll(PARQUET_TIME_TRANSFER_PATH, PARQUET_LOCAL_DATE_TIME_TRANSFER_PATH, null, NO_EXCEPTIONS, pairs); From 318c9d2c3b8b9567ae1db6d621d078a73e25128f Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Mon, 13 Nov 2023 22:18:31 -0600 Subject: [PATCH 18/41] Clarify standard deviation / variance documentation with Bessel's correction (#4786) * First attempt at fixing std/var docs * Fix Java file formatting * Chip review suggestions * Spotless apply * Spotless apply (again) --- .../include/public/deephaven/client/client.h | 24 +++- .../public/deephaven/client/update_by.h | 25 +++-- engine/function/src/templates/Numeric.ftl | 104 ++++++++++++------ .../engine/util/TotalsTableBuilder.java | 4 +- go/pkg/client/query.go | 20 +++- go/pkg/client/tablehandle.go | 10 +- py/client/pydeephaven/_table_interface.py | 12 +- py/client/pydeephaven/agg.py | 10 +- py/client/pydeephaven/table.py | 12 +- py/client/pydeephaven/updateby.py | 26 +++-- py/server/deephaven/agg.py | 12 +- py/server/deephaven/table.py | 10 +- py/server/deephaven/updateby.py | 28 +++-- .../io/deephaven/api/TableOperations.java | 52 +++++++-- .../io/deephaven/api/agg/Aggregation.java | 14 ++- .../io/deephaven/api/agg/spec/AggSpecStd.java | 8 +- .../io/deephaven/api/agg/spec/AggSpecVar.java | 7 +- .../api/updateby/UpdateByOperation.java | 80 +++++++++----- web/WebDevelopersGuide.md | 4 +- .../web/client/api/JsColumnStatistics.java | 6 +- .../tree/enums/JsAggregationOperation.java | 14 ++- 21 files changed, 340 insertions(+), 142 deletions(-) diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h index 7cb72ca94e1..a99ca9c46e9 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h @@ -568,8 +568,11 @@ class Aggregate { } /** - * Returns an aggregator that computes the standard deviation of values, within an aggregation - * group, for each input column. + * Returns an aggregator that computes the sample standard deviation of values, within an + * aggregation group, for each input column. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ [[nodiscard]] static Aggregate Std(std::vector column_specs); @@ -608,8 +611,11 @@ class Aggregate { } /** - * Returns an aggregator that computes the variance of values, within an aggregation group, + * Returns an aggregator that computes the sample variance of values, within an aggregation group, * for each input column. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ [[nodiscard]] static Aggregate Var(std::vector column_specs); @@ -801,8 +807,11 @@ Aggregate AggPct(double percentile, Args &&... args) { } /** - * Returns an aggregator that computes the standard deviation of values, within an aggregation - * group, for each input column. + * Returns an aggregator that computes the sample standard deviation of values, within an aggregation group, + * for each input column. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ template [[nodiscard]] @@ -821,8 +830,11 @@ Aggregate aggSum(Args &&... args) { } /** - * Returns an aggregator that computes the variance of values, within an aggregation group, + * Returns an aggregator that computes the sample variance of values, within an aggregation group, * for each input column. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ template [[nodiscard]] diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h index 5b9d41025a0..f2d722718e1 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/update_by.h @@ -528,11 +528,13 @@ UpdateByOperation rollingCountTime(std::string timestamp_col, std::vector cols, int rev_ticks, int fwd_ticks = 0); /** - * Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the - * windowing unit. This function accepts nanoseconds or time strings as the reverse and forward - * window parameters. Negative values are allowed and can be used to generate completely forward or - * completely reverse windows. A row containing a null in the timestamp column belongs to no window - * and will not be considered in the windows of other rows; its output will be null. + * Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + * windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + * Negative values are allowed and can be used to generate completely forward or completely reverse windows. + * A row containing a null in the timestamp column belongs to no window and will not be considered in the windows + * of other rows; its output will be null. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * See the documentation of rollingSumTime() for examples of window values. * diff --git a/engine/function/src/templates/Numeric.ftl b/engine/function/src/templates/Numeric.ftl index 49df0a3f4af..6270b603bea 100644 --- a/engine/function/src/templates/Numeric.ftl +++ b/engine/function/src/templates/Numeric.ftl @@ -427,20 +427,26 @@ public class Numeric { } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.boxed}[] values) { return var(unbox(values)); } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.primitive}... values) { if (values == null) { @@ -451,10 +457,13 @@ public class Numeric { } /** - * Returns the variance. Null values are excluded. + * Returns the sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return variance of non-null values. + * @return sample variance of non-null values. */ public static double var(${pt.vector} values) { if (values == null) { @@ -476,7 +485,7 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute variance. + // Return NaN if poisoned or too few values to compute sample variance. if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2)) { return Double.NaN; } @@ -487,7 +496,7 @@ public class Numeric { final double delta = sum2 - vs2bar; final double rel_eps = delta / eps; - // Return zero when the variance is leq the floating point error. + // Return zero when the sample variance is leq the floating point error. return Math.abs(rel_eps) > 1.0 ? delta / (count - 1) : 0.0; } @@ -495,11 +504,14 @@ public class Numeric { <#if pt2.valueType.isNumber > /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Weighted sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.primitive}[] values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -510,11 +522,14 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Weighted sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.primitive}[] values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -525,11 +540,14 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.vector} values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -540,11 +558,14 @@ public class Numeric { } /** - * Returns the weighted variance. Null values are excluded. + * Returns the weighted sample variance. Null values are excluded. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. * @param weights weights - * @return weighted variance of non-null values. + * @return weighted sample variance of non-null values. */ public static double wvar(${pt.vector} values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -579,7 +600,7 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute variance. + // Return NaN if poisoned or too few values to compute sample variance. if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2) || Double.isNaN(count) || Double.isNaN(count2)) { return Double.NaN; } @@ -597,20 +618,26 @@ public class Numeric { /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.boxed}[] values) { return std(unbox(values)); } /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.primitive}... values) { if (values == null) { @@ -621,10 +648,13 @@ public class Numeric { } /** - * Returns the standard deviation. Null values are excluded. + * Returns the sample standard deviation. Null values are excluded. + * + * Sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param values values. - * @return standard deviation of non-null values. + * @return sample standard deviation of non-null values. */ public static double std(${pt.vector} values) { if (values == null) { @@ -639,11 +669,14 @@ public class Numeric { <#if pt2.valueType.isNumber > /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.primitive}[] values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -654,11 +687,14 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.primitive}[] values, ${pt2.vector} weights) { if (values == null || weights == null) { @@ -669,11 +705,14 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.vector} values, ${pt2.primitive}[] weights) { if (values == null || weights == null) { @@ -684,11 +723,14 @@ public class Numeric { } /** - * Returns the weighted standard deviation. Null values are excluded. + * Returns the weighted sample standard deviation. Null values are excluded. + * + * Weighted sample standard deviation is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the weighted sample variance will be an unbiased estimator of weighted population variance. * * @param values values. * @param weights weights - * @return weighted standard deviation of non-null values. + * @return weighted sample standard deviation of non-null values. */ public static double wstd(${pt.vector} values, ${pt2.vector} weights) { if (values == null || weights == null) { diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java index 77e35040f2e..d3399ec18ad 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java @@ -46,11 +46,11 @@ public enum AggType { Sum, /** Return the sum of absolute values in each group. */ AbsSum, - /** Return the variance of values in each group. */ + /** Return the sample variance of values in each group. */ Var, /** Return the average of values in each group. */ Avg, - /** Return the standard deviation of each group. */ + /** Return the sample standard deviation of each group. */ Std, /** Return the first value of each group. */ First, diff --git a/go/pkg/client/query.go b/go/pkg/client/query.go index ddb25303a38..cfaac62a844 100644 --- a/go/pkg/client/query.go +++ b/go/pkg/client/query.go @@ -1013,14 +1013,20 @@ func (qb QueryNode) AvgBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_AVG}) } -// StdBy returns the standard deviation for each group. Null values are ignored. +// StdBy returns the sample standard deviation for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (qb QueryNode) StdBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_STD}) } -// VarBy returns the variance for each group. Null values are ignored. +// VarBy returns the sample variance for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (qb QueryNode) VarBy(by ...string) QueryNode { return qb.addOp(dedicatedAggOp{child: qb, colNames: by, kind: tablepb2.ComboAggregateRequest_VAR}) } @@ -1156,15 +1162,21 @@ func (b *AggBuilder) Percentile(percentile float64, cols ...string) *AggBuilder return b } -// Std returns an aggregator that computes the standard deviation of values, within an aggregation group, for each input column. +// Std returns an aggregator that computes the sample standard deviation of values, within an aggregation group, for each input column. // The source columns are specified by cols. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (b *AggBuilder) StdDev(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_STD}) return b } -// Var returns an aggregator that computes the variance of values, within an aggregation group, for each input column. +// Var returns an aggregator that computes the sample variance of values, within an aggregation group, for each input column. // The source columns are specified by cols. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (b *AggBuilder) Variance(cols ...string) *AggBuilder { b.addAgg(aggPart{matchPairs: cols, kind: tablepb2.ComboAggregateRequest_VAR}) return b diff --git a/go/pkg/client/tablehandle.go b/go/pkg/client/tablehandle.go index 94769a6f8a9..5b1f5f38a32 100644 --- a/go/pkg/client/tablehandle.go +++ b/go/pkg/client/tablehandle.go @@ -543,8 +543,11 @@ func (th *TableHandle) AvgBy(ctx context.Context, cols ...string) (*TableHandle, return th.client.dedicatedAggOp(ctx, th, cols, "", tablepb2.ComboAggregateRequest_AVG) } -// StdBy returns the standard deviation for each group. Null values are ignored. +// StdBy returns the sample standard deviation for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample standard deviation is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { return nil, ErrInvalidTableHandle @@ -553,8 +556,11 @@ func (th *TableHandle) StdBy(ctx context.Context, cols ...string) (*TableHandle, return th.client.dedicatedAggOp(ctx, th, cols, "", tablepb2.ComboAggregateRequest_STD) } -// VarBy returns the variance for each group. Null values are ignored. +// VarBy returns the sample variance for each group. Null values are ignored. // Columns not used in the grouping must be numeric. +// +// Sample variance is calculated using `Bessel's correction `_, +// which ensures that the sample variance will be an unbiased estimator of population variance. func (th *TableHandle) VarBy(ctx context.Context, cols ...string) (*TableHandle, error) { if !th.rLockIfValid() { return nil, ErrInvalidTableHandle diff --git a/py/client/pydeephaven/_table_interface.py b/py/client/pydeephaven/_table_interface.py index 60751d9af3b..fc3e0b3a5c3 100644 --- a/py/client/pydeephaven/_table_interface.py +++ b/py/client/pydeephaven/_table_interface.py @@ -466,8 +466,11 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: return self.table_op_handler(table_op) def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: - """The std_by method creates a new table containing the standard deviation for each group. Columns not used - in the grouping must be of numeric types. + """The std_by method creates a new table containing the sample standard deviation for each group. Columns not + used in the grouping must be of numeric types. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]]): the group-by column names(s), default is None, meaning grouping @@ -483,9 +486,12 @@ def std_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: return self.table_op_handler(table_op) def var_by(self, by: Union[str, List[str]] = None) -> Union[Table, Query]: - """The var_by method creates a new table containing the variance for each group. Columns not used in the + """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: by (Union[str, List[str]], optional): the group-by column name(s), default is None, meaning grouping all the rows into one group diff --git a/py/client/pydeephaven/agg.py b/py/client/pydeephaven/agg.py index f8a3eaf7fda..b50df148f1f 100644 --- a/py/client/pydeephaven/agg.py +++ b/py/client/pydeephaven/agg.py @@ -300,7 +300,10 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Std (standard deviation) aggregation. + """Creates a Std (sample standard deviation) aggregation. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; @@ -366,7 +369,10 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, def var(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Variance aggregation. + """Creates a sample Variance aggregation. + + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; diff --git a/py/client/pydeephaven/table.py b/py/client/pydeephaven/table.py index 478616eece8..4251639731f 100644 --- a/py/client/pydeephaven/table.py +++ b/py/client/pydeephaven/table.py @@ -529,8 +529,11 @@ def avg_by(self, by: Union[str, List[str]] = None) -> Table: return super(Table, self).avg_by(by) def std_by(self, by: Union[str, List[str]] = None) -> Table: - """The std_by method creates a new table containing the standard deviation for each group. Columns not used - in the grouping must be of numeric types. + """The std_by method creates a new table containing the sample standard deviation for each group. Columns not + used in the grouping must be of numeric types. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, List[str]]): the group-by column names(s), default is None, meaning grouping @@ -545,9 +548,12 @@ def std_by(self, by: Union[str, List[str]] = None) -> Table: return super(Table, self).std_by(by) def var_by(self, by: Union[str, List[str]] = None) -> Table: - """The var_by method creates a new table containing the variance for each group. Columns not used in the + """The var_by method creates a new table containing the sample variance for each group. Columns not used in the grouping must be of numeric types. + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: by (Union[str, List[str]], optional): the group-by column name(s), default is None, meaning grouping all the rows into one group diff --git a/py/client/pydeephaven/updateby.py b/py/client/pydeephaven/updateby.py index 9d096d1fe57..af1f1a71edd 100644 --- a/py/client/pydeephaven/updateby.py +++ b/py/client/pydeephaven/updateby.py @@ -1279,10 +1279,13 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using ticks as the windowing unit. Ticks - are row counts, and you may specify the reverse and forward window in number of rows to include. The current row - is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and - can be used to generate completely forward or completely reverse windows. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the + windowing unit. Ticks are row counts, and you may specify the reverse and forward window in number of rows to + include. The current row is considered to belong to the reverse window but not the forward window. Also, negative + values are allowed and can be used to generate completely forward or completely reverse windows. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: rev_ticks = 1, fwd_ticks = 0 - contains only the current row @@ -1298,7 +1301,7 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int Args: cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_ticks (int): the look-behind window size (in rows/ticks) fwd_ticks (int): the look-forward window size (int rows/ticks), default is 0 @@ -1322,11 +1325,14 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the windowing unit. This - function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in - the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will - be null. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + containing a null in the timestamp column belongs to no window and will not be considered in the windows of other + rows; its output will be null. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: rev_time = 0, fwd_time = 0 - contains rows that exactly match the current row timestamp diff --git a/py/server/deephaven/agg.py b/py/server/deephaven/agg.py index 622e0320507..7e4c4293f73 100644 --- a/py/server/deephaven/agg.py +++ b/py/server/deephaven/agg.py @@ -272,7 +272,11 @@ def sorted_last(order_by: str, cols: Union[str, List[str]] = None) -> Aggregatio def std(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Std aggregation. + """Creates a Std (sample standard deviation) aggregation. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; @@ -312,7 +316,11 @@ def unique(cols: Union[str, List[str]] = None, include_nulls: bool = False, non_ def var(cols: Union[str, List[str]] = None) -> Aggregation: - """Creates a Var aggregation. + """Creates a sample Var aggregation. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. + Args: cols (Union[str, List[str]]): the column(s) to aggregate on, can be renaming expressions, i.e. "new_col = col"; diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 8a60dae1008..b541480b4a2 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -1782,7 +1782,10 @@ def weighted_avg_by(self, wcol: str, by: Union[str, Sequence[str]] = None) -> Ta raise DHError(e, "table avg_by operation failed.") from e def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: - """The std_by method creates a new table containing the standard deviation for each group. + """The std_by method creates a new table containing the sample standard deviation for each group. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, Sequence[str]], optional): the group-by column name(s), default is None @@ -1803,7 +1806,10 @@ def std_by(self, by: Union[str, Sequence[str]] = None) -> Table: raise DHError(e, "table std_by operation failed.") from e def var_by(self, by: Union[str, Sequence[str]] = None) -> Table: - """The var_by method creates a new table containing the variance for each group. + """The var_by method creates a new table containing the sample variance for each group. + + Sample variance is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Args: by (Union[str, Sequence[str]], optional): the group-by column name(s), default is None diff --git a/py/server/deephaven/updateby.py b/py/server/deephaven/updateby.py index efff037fff0..56f60a23836 100644 --- a/py/server/deephaven/updateby.py +++ b/py/server/deephaven/updateby.py @@ -1190,10 +1190,13 @@ def rolling_count_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using ticks as the windowing unit. Ticks - are row counts, and you may specify the reverse and forward window in number of rows to include. The current row - is considered to belong to the reverse window but not the forward window. Also, negative values are allowed and - can be used to generate completely forward or completely reverse windows. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using ticks as the + windowing unit. Ticks are row counts, and you may specify the reverse and forward window in number of rows to + include. The current row is considered to belong to the reverse window but not the forward window. Also, negative + values are allowed and can be used to generate completely forward or completely reverse windows. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: | `rev_ticks = 1, fwd_ticks = 0` - contains only the current row @@ -1209,7 +1212,7 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int Args: cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_ticks (int): the look-behind window size (in rows/ticks) fwd_ticks (int): the look-forward window size (int rows/ticks), default is 0 @@ -1228,11 +1231,14 @@ def rolling_std_tick(cols: Union[str, List[str]], rev_ticks: int, fwd_ticks: int def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[int, str], fwd_time: Union[int, str] = 0) -> UpdateByOperation: - """Creates a rolling standard deviation UpdateByOperation for the supplied column names, using time as the windowing unit. This - function accepts nanoseconds or time strings as the reverse and forward window parameters. Negative values are - allowed and can be used to generate completely forward or completely reverse windows. A row containing a null in - the timestamp column belongs to no window and will not be considered in the windows of other rows; its output will - be null. + """Creates a rolling sample standard deviation UpdateByOperation for the supplied column names, using time as the + windowing unit. This function accepts nanoseconds or time strings as the reverse and forward window parameters. + Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + containing a null in the timestamp column belongs to no window and will not be considered in the windows of other + rows; its output will be null. + + Sample standard deviation is computed using `Bessel's correction `_, + which ensures that the sample variance will be an unbiased estimator of population variance. Here are some examples of window values: | `rev_time = 0, fwd_time = 0` - contains rows that exactly match the current row timestamp @@ -1250,7 +1256,7 @@ def rolling_std_time(ts_col: str, cols: Union[str, List[str]], rev_time: Union[i Args: ts_col (str): the timestamp column for determining the window cols (Union[str, List[str]]): the column(s) to be operated on, can include expressions to rename the output, - i.e. "new_col = col"; when empty, update_by perform the rolling standard deviation operation on all columns. + i.e. "new_col = col"; when empty, update_by perform the rolling sample standard deviation operation on all columns. rev_time (int): the look-behind window size, can be expressed as an integer in nanoseconds or a time interval string, e.g. "PT00:00:00.001" or "PT5M" fwd_time (int): the look-ahead window size, can be expressed as an integer in nanoseconds or a time diff --git a/table-api/src/main/java/io/deephaven/api/TableOperations.java b/table-api/src/main/java/io/deephaven/api/TableOperations.java index 6b6b6cfc92f..c8899973338 100644 --- a/table-api/src/main/java/io/deephaven/api/TableOperations.java +++ b/table-api/src/main/java/io/deephaven/api/TableOperations.java @@ -1165,16 +1165,24 @@ TOPS updateBy(UpdateByControl control, Collection o // ------------------------------------------------------------------------------------------- /** - * Produces a single row table with the standard deviation of each column. + * Produces a single row table with the sample standard deviation of each column. *

* When the input table is empty, zero output rows are produced. + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. */ @ConcurrentMethod TOPS stdBy(); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1182,8 +1190,12 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(String... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1191,8 +1203,12 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS stdBy(ColumnName... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the standard deviation for the rest - * of the fields + * Groups the data column according to groupByColumns and computes the sample standard deviation for + * the rest of the fields + *

+ * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1202,16 +1218,22 @@ TOPS updateBy(UpdateByControl control, Collection o // ------------------------------------------------------------------------------------------- /** - * Produces a single row table with the variance of each column. + * Produces a single row table with the sample variance of each column. *

* When the input table is empty, zero output rows are produced. + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. */ @ConcurrentMethod TOPS varBy(); /** - * Groups the data column according to groupByColumns and computes the variance for the rest of the - * fields + * Groups the data column according to groupByColumns and computes the sample variance for the rest of + * the fields + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1219,8 +1241,11 @@ TOPS updateBy(UpdateByControl control, Collection o TOPS varBy(String... groupByColumns); /** - * Groups the data column according to groupByColumns and computes the variance for the rest of the - * fields + * Groups the data column according to groupByColumns and computes the sample variance for the rest of + * the fields + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ @@ -1230,6 +1255,9 @@ TOPS updateBy(UpdateByControl control, Collection o /** * Groups the data column according to groupByColumns and computes the variance for the rest of the * fields + *

+ * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param groupByColumns The grouping columns as in {@link TableOperations#groupBy} */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java index f228f3e324f..23d249e05dc 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java +++ b/table-api/src/main/java/io/deephaven/api/agg/Aggregation.java @@ -522,8 +522,12 @@ static Aggregation AggSortedLast(Collection sortColumns, Strin } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecStd standard deviation} aggregation for the supplied column name - * pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecStd sample standard deviation} aggregation for the supplied + * column name pairs. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param pairs The input/output column name pairs * @return The aggregation @@ -608,7 +612,11 @@ static Aggregation AggUnique(boolean includeNulls, UnionObject nonUniqueSentinel } /** - * Create a {@link io.deephaven.api.agg.spec.AggSpecVar variance} aggregation for the supplied column name pairs. + * Create a {@link io.deephaven.api.agg.spec.AggSpecVar sample variance} aggregation for the supplied column name + * pairs. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), + * which ensures that the sample variance will be an unbiased estimator of population variance. * * @param pairs The input/output column name pairs * @return The aggregation diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java index 5b18e83bee6..2e7513bb666 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecStd.java @@ -8,8 +8,12 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the standard deviation of the input column values for each group. Only works - * for numeric input types. + * Specifies an aggregation that outputs the sample standard deviation of the input column values for each group. Only + * works for numeric input types. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @see TableOperations#stdBy */ diff --git a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java index dd5e5b4c364..b86e5fa3eeb 100644 --- a/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java +++ b/table-api/src/main/java/io/deephaven/api/agg/spec/AggSpecVar.java @@ -8,8 +8,11 @@ import org.immutables.value.Value.Immutable; /** - * Specifies an aggregation that outputs the variance of the input column values for each group. Only works for numeric - * input types. + * Specifies an aggregation that outputs the sample variance of the input column values for each group. Only works for + * numeric input types. + * + * Sample variance is computed using Bessel's correction (https://en.wikipedia.org/wiki/Bessel%27s_correction), which + * ensures that the sample variance will be an unbiased estimator of population variance. * * @see TableOperations#varBy */ diff --git a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java index f59e448cbd2..745b85734cb 100644 --- a/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java +++ b/table-api/src/main/java/io/deephaven/api/updateby/UpdateByOperation.java @@ -1620,11 +1620,15 @@ static UpdateByOperation RollingCount(String timestampCol, long revTime, long fw /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using ticks as the - * windowing unit. Ticks are row counts and you may specify the previous window in number of rows to include. The - * current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will simply - * return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and this - * row for a total of 10 rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks + * as the windowing unit. Ticks are row counts and you may specify the previous window in number of rows to include. + * The current row is considered to belong to the reverse window, so calling this with {@code revTicks = 1} will + * simply return the current row. Specifying {@code revTicks = 10} will include the previous 9 rows to this one and + * this row for a total of 10 rows. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param revTicks the look-behind window size (in rows/ticks) * @param pairs The input/output column name pairs @@ -1635,11 +1639,11 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using ticks as the - * windowing unit. Ticks are row counts and you may specify the reverse and forward window in number of rows to - * include. The current row is considered to belong to the reverse window but not the forward window. Also, negative - * values are allowed and can be used to generate completely forward or completely reverse windows. Here are some - * examples of window values: + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using ticks + * as the windowing unit. Ticks are row counts and you may specify the reverse and forward window in number of rows + * to include. The current row is considered to belong to the reverse window but not the forward window. Also, + * negative values are allowed and can be used to generate completely forward or completely reverse windows. Here + * are some examples of window values: *

    *
  • {@code revTicks = 1, fwdTicks = 0} - contains only the current row
  • *
  • {@code revTicks = 10, fwdTicks = 0} - contains 9 previous rows and the current row
  • @@ -1654,6 +1658,10 @@ static UpdateByOperation RollingStd(long revTicks, String... pairs) { * following the current row (inclusive) *
* + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param revTicks the look-behind window size (in rows/ticks) * @param fwdTicks the look-ahead window size (in rows/ticks) * @param pairs The input/output column name pairs @@ -1664,10 +1672,10 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@link Duration duration} as the reverse window parameter. A row containing - * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered - * in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@link Duration duration} as the reverse window parameter. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. * * Here are some examples of window values: *
    @@ -1675,6 +1683,10 @@ static UpdateByOperation RollingStd(long revTicks, long fwdTicks, String... pair *
  • {@code revDuration = 10m} - contains rows from 10m earlier through the current row timestamp (inclusive)
  • *
* + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revDuration the look-behind window size (in Duration) * @param pairs The input/output column name pairs @@ -1685,11 +1697,11 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@link Duration durations} as the reverse and forward window parameters. - * Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row - * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be - * considered in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@link Duration durations} as the reverse and forward window + * parameters. Negative values are allowed and can be used to generate completely forward or completely reverse + * windows. A row containing a {@code null} in the timestamp column belongs to no window and will not have a value + * computed or be considered in the windows of other rows. * * Here are some examples of window values: *
    @@ -1706,6 +1718,10 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, S * current row timestamp (inclusive), this is a purely forwards looking window *
* + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. + * * @param timestampCol the name of the timestamp column * @param revDuration the look-behind window size (in Duration) * @param fwdDuration the look-ahead window size (in Duration) @@ -1718,10 +1734,14 @@ static UpdateByOperation RollingStd(String timestampCol, Duration revDuration, D } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A row containing a - * {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered in - * the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse window parameters. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) @@ -1733,11 +1753,15 @@ static UpdateByOperation RollingStd(String timestampCol, long revTime, String... } /** - * Create a {@link RollingStdSpec rolling standard deviation} for the supplied column name pairs, using time as the - * windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window parameters. Negative - * values are allowed and can be used to generate completely forward or completely reverse windows. A row containing - * a {@code null} in the timestamp column belongs to no window and will not have a value computed or be considered - * in the windows of other rows. + * Create a {@link RollingStdSpec rolling sample standard deviation} for the supplied column name pairs, using time + * as the windowing unit. This function accepts {@code nanoseconds} as the reverse and forward window parameters. + * Negative values are allowed and can be used to generate completely forward or completely reverse windows. A row + * containing a {@code null} in the timestamp column belongs to no window and will not have a value computed or be + * considered in the windows of other rows. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an unbiased + * estimator of population variance. * * @param timestampCol the name of the timestamp column * @param revTime the look-behind window size (in nanoseconds) diff --git a/web/WebDevelopersGuide.md b/web/WebDevelopersGuide.md index 87bc0ab50b7..33a202e8dd3 100644 --- a/web/WebDevelopersGuide.md +++ b/web/WebDevelopersGuide.md @@ -992,9 +992,9 @@ This enum describes the name of each supported operation/aggregation type when c value is "Max". * `SUM` - The sum of all values in the specified column. Can only apply to numeric types. String value is "Sum". * `ABS_SUM` - The sum of all values, as their distance from zero, in the specified column. Can only apply to numeric types. String value is “AbsSum”. - * `VAR` - The variance of all values in the specified column. Can only apply to numeric types. String value is "Var". + * `VAR` - The sample variance of all values in the specified column. Can only apply to numeric types. String value is "Var". * `AVG` - The average of all values in the specified column. Can only apply to numeric types. String value is "Avg". - * `STD` - The standard deviation of all values in the specified column. Can only apply to numeric types. String value is + * `STD` - The sample standard deviation of all values in the specified column. Can only apply to numeric types. String value is "Std". * `FIRST` - The first value in the specified column. Can apply to any type. String value is "First". * `LAST` - The last value in the specified column. Can apply to any type. String value is "Last". diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java index 552bbc6497a..2b70704618e 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsColumnStatistics.java @@ -72,7 +72,11 @@ public enum StatType { */ MAX_ABS("MAX (ABS)", null), /** - * The standard deviation of the values in the column. + * The sample standard deviation of the values in the column. + * + * Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ STD_DEV("STD DEV", "double"), /** diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java index fa0c50cce57..ab0824b1a98 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java @@ -45,8 +45,12 @@ public class JsAggregationOperation { */ ABS_SUM = "AbsSum", /** - * The variance of all values in the specified column. Can only apply to numeric types. String value is - * "Var". + * The sample variance of all values in the specified column. Can only apply to numeric types. String value + * is "Var". + * + * Sample variance is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ VAR = "Var", /** @@ -55,8 +59,10 @@ public class JsAggregationOperation { */ AVG = "Avg", /** - * The standard deviation of all values in the specified column. Can only apply to numeric types. String - * value is "Std". + * The sample standard deviation of all values in the specified column. Can only apply to numeric types. + * String value is "Std". Sample standard deviation is computed using Bessel's correction + * (https://en.wikipedia.org/wiki/Bessel%27s_correction), which ensures that the sample variance will be an + * unbiased estimator of population variance. */ STD = "Std", /** From 78f0d11cd25d8c5c5f1b7de2bbe7ee544682b633 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Tue, 14 Nov 2023 12:48:58 -0700 Subject: [PATCH 19/41] Support calling numpy ufuncs assigned to top-level names in formulas (#4759) * Support alias-ed numpy ufunc in formulas * Add meaningful comments to the changes --- py/server/deephaven/table.py | 32 ++++++++++++++----- .../tests/test_pyfunc_return_java_values.py | 21 ++++++++++++ 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index b541480b4a2..1a55c58f823 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -387,7 +387,15 @@ def _encode_signature(fn: Callable) -> str: If a parameter or the return of the function is not annotated, the default 'O' - object type, will be used. """ - sig = inspect.signature(fn) + try: + sig = inspect.signature(fn) + except: + # in case inspect.signature() fails, we'll just use the default 'O' - object type. + # numpy ufuncs actually have signature encoded in their 'types' attribute, we want to better support + # them in the future (https://github.com/deephaven/deephaven-core/issues/4762) + if type(fn) == np.ufunc: + return "O"*fn.nin + "->" + "O" + return "->O" np_type_codes = [] for n, p in sig.parameters.items(): @@ -429,8 +437,8 @@ def _py_udf(fn: Callable): if hasattr(fn, "return_type"): return fn ret_dtype = _udf_return_dtype(fn) - return_array = False + return_array = False # If the function is a numba guvectorized function, examine the signature of the function to determine if it # returns an array. if isinstance(fn, numba.np.ufunc.gufunc.GUFunc): @@ -439,12 +447,20 @@ def _py_udf(fn: Callable): if rtype: return_array = True else: - return_annotation = _parse_annotation(inspect.signature(fn).return_annotation) - component_type = _component_np_dtype_char(return_annotation) - if component_type: - ret_dtype = dtypes.from_np_dtype(np.dtype(component_type)) - if ret_dtype in _BUILDABLE_ARRAY_DTYPE_MAP: - return_array = True + try: + return_annotation = _parse_annotation(inspect.signature(fn).return_annotation) + except ValueError: + # the function has no return annotation, and since we can't know what the exact type is, the return type + # defaults to the generic object type therefore it is not an array of a specific type, + # but see (https://github.com/deephaven/deephaven-core/issues/4762) for future imporvement to better support + # numpy ufuncs. + pass + else: + component_type = _component_np_dtype_char(return_annotation) + if component_type: + ret_dtype = dtypes.from_np_dtype(np.dtype(component_type)) + if ret_dtype in _BUILDABLE_ARRAY_DTYPE_MAP: + return_array = True @wraps(fn) def wrapper(*args, **kwargs): diff --git a/py/server/tests/test_pyfunc_return_java_values.py b/py/server/tests/test_pyfunc_return_java_values.py index 56d01ae6c59..aef0d44cb93 100644 --- a/py/server/tests/test_pyfunc_return_java_values.py +++ b/py/server/tests/test_pyfunc_return_java_values.py @@ -271,6 +271,27 @@ def f1(col) -> Optional[List[int]]: self.assertEqual(t.columns[0].data_type, dtypes.long_array) self.assertEqual(t.to_string().count("null"), 5) + def test_np_ufunc(self): + # no vectorization and no type inference + npsin = np.sin + t = empty_table(10).update(["X1 = npsin(i)"]) + self.assertEqual(t.columns[0].data_type, dtypes.PyObject) + t2 = t.update("X2 = X1.getDoubleValue()") + self.assertEqual(t2.columns[1].data_type, dtypes.double) + + import numba + + # numba vectorize decorator doesn't support numpy ufunc + with self.assertRaises(TypeError): + nbsin = numba.vectorize([numba.float64(numba.float64)])(np.sin) + + # this is the workaround that utilizes vectorization and type inference + @numba.vectorize([numba.float64(numba.float64)], nopython=True) + def nbsin(x): + return np.sin(x) + t3 = empty_table(10).update(["X3 = nbsin(i)"]) + self.assertEqual(t3.columns[0].data_type, dtypes.double) + if __name__ == '__main__': unittest.main() From 02c9deb24ea6d1c9dfff906f8a376f7483d6472e Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Tue, 14 Nov 2023 13:03:08 -0700 Subject: [PATCH 20/41] change default dtype_backend for to_pandas (#4815) * change default dtype_backend for to_pandas * Improve docstrings and add comments in test code * Accept suggested changes to docstrings --- py/server/deephaven/pandas.py | 13 ++++++++----- py/server/tests/test_learn_gather.py | 2 +- py/server/tests/test_pandas.py | 16 ++++++++-------- py/server/tests/test_parquet.py | 7 +++++-- py/server/tests/test_table.py | 2 +- 5 files changed, 23 insertions(+), 17 deletions(-) diff --git a/py/server/deephaven/pandas.py b/py/server/deephaven/pandas.py index 14e74ece14c..c14ae1f9ca7 100644 --- a/py/server/deephaven/pandas.py +++ b/py/server/deephaven/pandas.py @@ -112,7 +112,8 @@ def _column_to_series(table: Table, col_def: Column, conv_null: bool) -> pd.Seri } -def to_pandas(table: Table, cols: List[str] = None, dtype_backend: Literal[None, "pyarrow", "numpy_nullable"] = None, +def to_pandas(table: Table, cols: List[str] = None, + dtype_backend: Literal[None, "pyarrow", "numpy_nullable"] = "numpy_nullable", conv_null: bool = True) -> pd.DataFrame: """Produces a pandas DataFrame from a table. @@ -123,11 +124,13 @@ def to_pandas(table: Table, cols: List[str] = None, dtype_backend: Literal[None, Args: table (Table): the source table cols (List[str]): the source column names, default is None which means include all columns - dtype_backend (str): Which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays, + dtype_backend (str): which dtype_backend to use, e.g. whether a DataFrame should have NumPy arrays, nullable dtypes are used for all dtypes that have a nullable implementation when “numpy_nullable” is set, - pyarrow is used for all dtypes if “pyarrow” is set. default is None, meaning Numpy backed DataFrames with - no nullable dtypes. - conv_null (bool): When dtype_backend is not set, whether to check for Deephaven nulls in the data and + pyarrow is used for all dtypes if “pyarrow” is set. None means Numpy backed DataFrames with no nullable + dtypes. Both "numpy_nullable" and "pyarrow" automatically convert Deephaven nulls to Pandas NA and enable + Pandas extension types. Extension types are needed to support types beyond NumPy's type system. Extension + types support operations such as properly mapping Java Strings to Python strings. default is "numpy_nullable". + conv_null (bool): when dtype_backend is not set, whether to check for Deephaven nulls in the data and automatically replace them with pd.NA. default is True. Returns: diff --git a/py/server/tests/test_learn_gather.py b/py/server/tests/test_learn_gather.py index 64a316be3f7..f0caff9d059 100644 --- a/py/server/tests/test_learn_gather.py +++ b/py/server/tests/test_learn_gather.py @@ -141,7 +141,7 @@ def base_test(self, source, model, np_dtype): gatherer_colmajor = lambda rowset, colset: gather.table_to_numpy_2d(rowset, colset, gather.MemoryLayout.COLUMN_MAJOR, np_dtype) - array_from_table = to_pandas(source, conv_null=False).values + array_from_table = to_pandas(source, dtype_backend=None, conv_null=False).values gathered_rowmajor = gatherer_rowmajor(rows, cols) gathered_colmajor = gatherer_colmajor(rows, cols) diff --git a/py/server/tests/test_pandas.py b/py/server/tests/test_pandas.py index f896c23d853..2e78809f85a 100644 --- a/py/server/tests/test_pandas.py +++ b/py/server/tests/test_pandas.py @@ -54,7 +54,7 @@ def tearDown(self) -> None: super().tearDown() def test_to_pandas_no_conv_null(self): - df = to_pandas(self.test_table, conv_null=False) + df = to_pandas(self.test_table, dtype_backend=None, conv_null=False) self.assertEqual(len(df.columns), len(self.test_table.columns)) self.assertEqual(df.size, 2 * len(self.test_table.columns)) df_series = [df[col] for col in list(df.columns)] @@ -70,7 +70,7 @@ def test_to_pandas_remaps(self): prepared_table = self.test_table.update( formulas=["Long = isNull(Long_) ? Double.NaN : Long_"]) - df = to_pandas(prepared_table, cols=["Boolean", "Long"], conv_null=False) + df = to_pandas(prepared_table, cols=["Boolean", "Long"], dtype_backend=None, conv_null=False) self.assertEqual(df['Long'].dtype, np.float64) self.assertEqual(df['Boolean'].values.dtype, np.bool_) @@ -88,12 +88,12 @@ def test_vector_column(self): test_table = test_table.group_by(["String"]) df = to_pandas(test_table, cols=["String", "Doubles"]) - self.assertEqual(df['String'].dtype, np.object_) + self.assertEqual(df['String'].dtype, pd.StringDtype()) self.assertEqual(df['Doubles'].dtype, np.object_) double_series = df['Doubles'] - self.assertEqual([1.0, 2.0], list(double_series[0].toArray())) - self.assertEqual([4.0, 8.0, 16.0], list(double_series[1].toArray())) + self.assertEqual([1.0, 2.0], list(double_series[0])) + self.assertEqual([4.0, 8.0, 16.0], list(double_series[1])) def test_invalid_col_name(self): with self.assertRaises(DHError) as cm: @@ -114,7 +114,7 @@ def test_to_table(self): double_col(name="Double", data=[1.01, -1.01]), ] test_table = new_table(cols=input_cols) - df = to_pandas(test_table, conv_null=False) + df = to_pandas(test_table, dtype_backend=None, conv_null=False) table_from_df = to_table(df) self.assert_table_equals(table_from_df, test_table) @@ -123,7 +123,7 @@ def test_to_table_boolean_with_none(self): table_with_null_bool = new_table(cols=input_cols) prepared_table = table_with_null_bool.update( formulas=["Boolean = isNull(Boolean) ? (byte)NULL_BYTE : (Boolean == true ? 1: 0)"]) - df = to_pandas(prepared_table, conv_null=False) + df = to_pandas(prepared_table, dtype_backend=None, conv_null=False) table_from_df = to_table(df) self.assert_table_equals(table_from_df, prepared_table) @@ -159,7 +159,7 @@ def test_round_trip_with_nulls(self): pyobj_col(name="PyObj", data=[CustomClass(1, "1"), None]), ] test_table = new_table(cols=input_cols) - df = to_pandas(test_table) + df = to_pandas(test_table, dtype_backend=None) self.assertEqual(len(df.columns), len(test_table.columns)) self.assertEqual(df.size, 2 * len(test_table.columns)) test_table2 = to_table(df) diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index b2cd933740b..36c70515000 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -347,7 +347,8 @@ def test_dates_and_time(self): from_disk = read('data_from_dh.parquet') self.assert_table_equals(dh_table, from_disk) - df_from_disk = to_pandas(from_disk) + # TODO dtype_backend=None is a workaround until https://github.com/deephaven/deephaven-core/issues/4823 is fixed + df_from_disk = to_pandas(from_disk, dtype_backend=None) if pandas.__version__.split('.')[0] == "1": df_from_pandas = pandas.read_parquet("data_from_dh.parquet", use_nullable_dtypes=True) else: @@ -384,7 +385,9 @@ def time_test_helper(pa_table, new_schema, dest): # Write the provided pyarrow table type-casted to the new schema pyarrow.parquet.write_table(pa_table.cast(new_schema), dest) from_disk = read(dest) - df_from_disk = to_pandas(from_disk) + + # TODO dtype_backend=None is a workaround until https://github.com/deephaven/deephaven-core/issues/4823 is fixed + df_from_disk = to_pandas(from_disk, dtype_backend=None) original_df = pa_table.to_pandas() # Compare the dataframes as strings self.assertTrue((df_from_disk.astype(str) == original_df.astype(str)).all().values.all()) diff --git a/py/server/tests/test_table.py b/py/server/tests/test_table.py index bc82fa2022b..816c4e74f99 100644 --- a/py/server/tests/test_table.py +++ b/py/server/tests/test_table.py @@ -676,7 +676,7 @@ def verify_layout_hint(t: Table, layout_hint_str: str): self.assertIn("RuntimeError", cm.exception.compact_traceback) def verify_table_data(self, t: Table, expected: List[Any], assert_not_in: bool = False): - t_data = to_pandas(t).values.flatten() + t_data = to_pandas(t, dtype_backend=None).values.flatten() for s in expected: if assert_not_in: self.assertNotIn(s, t_data) From af1443c1d5113228f0296514de4939a5eab9441c Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Tue, 14 Nov 2023 15:23:18 -0600 Subject: [PATCH 21/41] Stop requesting column statistics for preview columns (#4828) Will be reverted as part of #188 Fixes #4825 --- .../src/main/java/io/deephaven/web/client/api/JsTable.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java index fcd9f450af7..c31ac023a4f 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java @@ -1435,6 +1435,11 @@ public Promise partitionBy(Object keys, @JsOptional Boolean */ @JsMethod public Promise getColumnStatistics(Column column) { + if (column.getDescription() != null && column.getDescription().startsWith("Preview of type")) { + // TODO (deephaven-core#188) Remove this workaround when we don't preview columns until just before + // subscription + return Promise.reject("Can't produce column statistics for preview column"); + } List toRelease = new ArrayList<>(); return workerConnection.newState((c, state, metadata) -> { ColumnStatisticsRequest req = new ColumnStatisticsRequest(); From e60cb544a60d236ae87ccd94048951ee46f1e58d Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Wed, 15 Nov 2023 06:47:25 -0800 Subject: [PATCH 22/41] Add TableDefinition column name helpers (#4813) Adds some helper methods for on `TableDefinition#checkHasColumn`, `TableDefinition#checkHasColumns`, and `TableDefinition#getColumnNameSet`. Additionally, fixes up call sites that were (ab)using `Table#getColumnSourceMap` to simply get the keySet. This invokes a potentially extraneous Table#coalesce which can be avoided in these cases. In support of common scaffolding so #4771 won't need to call `Table#getColumnSource` for validation purposes. --- .../plot/util/ArgumentValidations.java | 2 +- .../engine/table/TableDefinition.java | 28 ++++ .../table/impl/NoSuchColumnException.java | 121 ++++++++++++++++++ .../benchmark/engine/SortBenchmark.java | 3 +- .../engine/table/impl/BaseTable.java | 15 +-- .../engine/table/impl/BucketingContext.java | 8 +- .../engine/table/impl/CrossJoinHelper.java | 2 +- .../table/impl/NoSuchColumnException.java | 33 ----- .../engine/table/impl/QueryTable.java | 18 +-- .../engine/table/impl/RedefinableTable.java | 9 +- .../engine/table/impl/TableDefaults.java | 2 +- .../table/impl/TableUpdateValidator.java | 6 +- .../by/ChunkedOperatorAggregationHelper.java | 47 +++---- .../impl/by/PartitionByChunkedOperator.java | 6 +- .../hierarchical/HierarchicalTableImpl.java | 9 +- .../impl/hierarchical/RollupTableImpl.java | 2 +- .../table/impl/remote/ConstructSnapshot.java | 7 +- .../select/MultiSourceFunctionalColumn.java | 17 +-- .../snapshot/SnapshotInternalListener.java | 2 +- .../engine/table/impl/updateby/UpdateBy.java | 5 +- .../table/impl/util/DynamicTableWriter.java | 2 +- .../util/KeyedArrayBackedMutableTable.java | 2 +- .../deephaven/engine/util/OuterJoinTools.java | 4 +- .../io/deephaven/engine/util/TableTools.java | 2 +- .../engine/util/TableToolsMergeHelper.java | 8 +- .../deephaven/engine/util/TickSuppressor.java | 2 +- .../engine/util/TotalsTableBuilder.java | 14 +- .../io/deephaven/engine/util/WindowCheck.java | 2 +- .../table/impl/MultiColumnSortTest.java | 2 +- .../table/impl/QueryTableAggregationTest.java | 8 +- .../engine/table/impl/QueryTableTest.java | 12 +- .../table/impl/SelectOverheadLimiter.java | 4 +- .../engine/table/impl/TestAggBy.java | 2 +- .../engine/table/impl/TestMoveColumns.java | 8 +- .../engine/table/impl/TestTotalsTable.java | 9 +- .../table/impl/indexer/TestRowSetIndexer.java | 4 +- .../deephaven/engine/testutil/TstUtils.java | 13 +- .../locations/TableBackedColumnLocation.java | 2 +- .../jdbc/JdbcToTableAdapterTest.java | 12 +- .../table/ParquetTableReadWriteTest.java | 9 +- .../table/ops/SelectDistinctGrpcImpl.java | 10 +- 41 files changed, 282 insertions(+), 191 deletions(-) create mode 100644 engine/api/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java delete mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java diff --git a/Plot/src/main/java/io/deephaven/plot/util/ArgumentValidations.java b/Plot/src/main/java/io/deephaven/plot/util/ArgumentValidations.java index f78cbeee45f..2d1a4dbf9fa 100644 --- a/Plot/src/main/java/io/deephaven/plot/util/ArgumentValidations.java +++ b/Plot/src/main/java/io/deephaven/plot/util/ArgumentValidations.java @@ -841,7 +841,7 @@ public static void assertColumnsInTable(final Table t, final PlotInfo plotInfo, assertNotNull(t, "t", plotInfo); assertNotNull(cols, "cols", plotInfo); for (String c : cols) { - if (!t.getColumnSourceMap().containsKey(c)) { + if (!t.hasColumns(c)) { throw new PlotIllegalArgumentException("Column " + c + " could not be found in table.", plotInfo); } } diff --git a/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java b/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java index 584c09743a1..66aa6b003f7 100644 --- a/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java +++ b/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java @@ -9,6 +9,7 @@ import io.deephaven.base.log.LogOutput; import io.deephaven.base.log.LogOutputAppendable; import io.deephaven.base.verify.Assert; +import io.deephaven.engine.table.impl.NoSuchColumnException; import io.deephaven.io.log.impl.LogOutputStringImpl; import io.deephaven.qst.column.header.ColumnHeader; import org.jetbrains.annotations.NotNull; @@ -204,6 +205,13 @@ public Map> getColumnNameMap() { .toMap(ColumnDefinition::getName, Function.identity(), Assert::neverInvoked, LinkedHashMap::new))); } + /** + * @return An unmodifiable set of column names + */ + public Set getColumnNameSet() { + return getColumnNameMap().keySet(); + } + /** * @return A list of {@link ColumnDefinition column definitions} for all * {@link ColumnDefinition.ColumnType#Partitioning partitioning} columns in the same relative order as the @@ -295,6 +303,26 @@ public String getColumnNamesAsString() { return getColumnStream().map(ColumnDefinition::getName).collect(Collectors.joining(",")); } + /** + * Check this definition to ensure that {@code columnName} is present. + * + * @param columnName The column name to check + * @throws NoSuchColumnException If {@code columnName} is missing + */ + public final void checkHasColumn(@NotNull String columnName) { + NoSuchColumnException.throwIf(getColumnNameSet(), columnName); + } + + /** + * Check this definition to ensure that all {@code columns} are present. + * + * @param columns The column names to check + * @throws NoSuchColumnException If any {@code columns} were missing + */ + public final void checkHasColumns(@NotNull Collection columns) { + NoSuchColumnException.throwIf(getColumnNameSet(), columns); + } + /** * Tests mutual-compatibility of {@code this} and {@code other}. To be mutually compatible, they must have the same * number of columns, each matched up with {@link ColumnDefinition#isCompatible}. As such, this method has an diff --git a/engine/api/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java b/engine/api/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java new file mode 100644 index 00000000000..2b867f191b4 --- /dev/null +++ b/engine/api/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java @@ -0,0 +1,121 @@ +/** + * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.engine.table.impl; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Exception thrown when a column is not found. + */ +public class NoSuchColumnException extends IllegalArgumentException { + + public static final String DELIMITER = ", "; + + public static final String DEFAULT_FORMAT_STR = "Unknown column names [%s], available column names are [%s]"; + + public enum Type { + MISSING, AVAILABLE, REQUESTED + } + + /** + * Equivalent to {@code throwIf(available, Collections.singleton(requested))}. + * + * @param available the available columns + * @param requested the requested columns + * @see #throwIf(Set, Collection) + */ + public static void throwIf(Set available, String requested) { + throwIf(available, Collections.singleton(requested)); + } + + /** + * Equivalent to {@code throwIf(available, requested, DEFAULT_FORMAT_STR, Type.MISSING, Type.AVAILABLE)} where + * {@code DEFAULT_FORMAT_STR} is {@value DEFAULT_FORMAT_STR}. + * + * @param available the available columns + * @param requested the requested columns + * @see #throwIf(Set, Collection, String, Type...) + */ + public static void throwIf(Set available, Collection requested) { + throwIf(available, requested, DEFAULT_FORMAT_STR, Type.MISSING, Type.AVAILABLE); + } + + /** + * Throws a {@link NoSuchColumnException} if any name from {@code requested} is not in {@code available}. The + * message will be constructed by {@link String#join(CharSequence, Iterable) joining} the respective collection with + * {@value DELIMITER} and presenting them to {@link String#format(String, Object...) format} in {@code types} order. + * + * @param available the available columns + * @param requested the requested columns + * @param formatStr the format string + * @param types the collection types order for formatting + */ + public static void throwIf(Set available, Collection requested, String formatStr, Type... types) { + final List missing = requested + .stream() + .filter(Predicate.not(available::contains)) + .collect(Collectors.toList()); + if (!missing.isEmpty()) { + final Object[] formatArgs = new Object[types.length]; + for (int i = 0; i < types.length; ++i) { + switch (types[i]) { + case MISSING: + formatArgs[i] = String.join(DELIMITER, missing); + break; + case AVAILABLE: + formatArgs[i] = String.join(DELIMITER, available); + break; + case REQUESTED: + formatArgs[i] = String.join(DELIMITER, requested); + break; + default: + throw new IllegalStateException("Unexpected case " + types[i]); + } + } + throw new NoSuchColumnException(String.format(formatStr, formatArgs)); + } + } + + /** + * Thrown when an operation can not find a required column(s). + * + *

+ * Callers may prefer to use {@link #throwIf(Set, Collection, String, Type...)} when applicable. + * + * @param message the message + */ + public NoSuchColumnException(String message) { + super(message); + } + + /** + * Thrown when an operation can not find a required column(s). + * + *

+ * Callers may prefer to use {@link #throwIf(Set, Collection)} when applicable. + * + * @param presentColumns the column names present in the table + * @param missingColumns the request column names that were not found + */ + public NoSuchColumnException(Collection presentColumns, Collection missingColumns) { + this(String.format(DEFAULT_FORMAT_STR, + String.join(DELIMITER, missingColumns), + String.join(DELIMITER, presentColumns))); + } + + /** + * Thrown when an operation can not find a required column. + * + * @param presentColumns the column names present in the table + * @param missingColumn the request column name that was not found + */ + public NoSuchColumnException(Collection presentColumns, String missingColumn) { + this(presentColumns, Collections.singleton(missingColumn)); + } +} diff --git a/engine/benchmark/src/benchmark/java/io/deephaven/benchmark/engine/SortBenchmark.java b/engine/benchmark/src/benchmark/java/io/deephaven/benchmark/engine/SortBenchmark.java index 8f2d9ed9927..d6a94b36e9d 100644 --- a/engine/benchmark/src/benchmark/java/io/deephaven/benchmark/engine/SortBenchmark.java +++ b/engine/benchmark/src/benchmark/java/io/deephaven/benchmark/engine/SortBenchmark.java @@ -138,7 +138,8 @@ public void setupEnv(BenchmarkParams params) { mcsWithSortColumn = inputTable.newModifiedColumnSet(sortCol); MutableInt ci = new MutableInt(); final String[] sortColumns = new String[inputTable.numColumns() - 1]; - inputTable.getColumnSourceMap().keySet().forEach(columnName -> { + + inputTable.getDefinition().getColumnNameSet().forEach(columnName -> { if (!columnName.equals(sortCol)) { sortColumns[ci.intValue()] = columnName; ci.increment(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/BaseTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/BaseTable.java index 7c16414c7d5..3331d99ed2d 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/BaseTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/BaseTable.java @@ -1020,13 +1020,8 @@ private String formatKeyColumns(String... columns) { } @Override - public void checkAvailableColumns(@NotNull final Collection columns) { - final Map> sourceMap = getColumnSourceMap(); - final String[] missingColumns = - columns.stream().filter(col -> !sourceMap.containsKey(col)).toArray(String[]::new); - if (missingColumns.length > 0) { - throw new NoSuchColumnException(sourceMap.keySet(), Arrays.asList(missingColumns)); - } + public final void checkAvailableColumns(@NotNull final Collection columns) { + getDefinition().checkHasColumns(columns); } public void copySortableColumns( @@ -1063,7 +1058,7 @@ void copySortableColumns(BaseTable destination, MatchPair[] renamedColumns) { // Process the original set of sortable columns, adding them to the new set if one of the below // 1) The column exists in the new table and was not renamed in any way but the Identity (C1 = C1) // 2) The column does not exist in the new table, but was renamed to another (C2 = C1) - final Set resultColumnNames = destination.getDefinition().getColumnNameMap().keySet(); + final Set resultColumnNames = destination.getDefinition().getColumnNameSet(); for (final String columnName : currentSortableColumns) { // Only add it to the set of sortable columns if it hasn't changed in an unknown way final String maybeRenamedColumn = columnMapping.get(columnName); @@ -1109,9 +1104,9 @@ void copySortableColumns(BaseTable destination, SelectColumn[] selectCols) { } // Now go through the other columns in the table and add them if they were unchanged - final Map> sourceMap = destination.getColumnSourceMap(); + final Set destKeys = destination.getDefinition().getColumnNameSet(); for (String col : currentSortableSet) { - if (sourceMap.containsKey(col)) { + if (destKeys.contains(col)) { newSortableSet.add(col); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/BucketingContext.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/BucketingContext.java index c89b48e4e34..46881351573 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/BucketingContext.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/BucketingContext.java @@ -20,6 +20,7 @@ import java.time.Instant; import java.util.Arrays; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import static io.deephaven.engine.table.impl.MatchPair.matchString; @@ -41,8 +42,11 @@ class BucketingContext implements SafeCloseable { BucketingContext(final String listenerPrefix, final QueryTable leftTable, final QueryTable rightTable, MatchPair[] columnsToMatch, MatchPair[] columnsToAdd, JoinControl control) { - final List conflicts = Arrays.stream(columnsToAdd).map(MatchPair::leftColumn) - .filter(cn -> leftTable.getColumnSourceMap().containsKey(cn)).collect(Collectors.toList()); + final Set leftKeys = leftTable.getDefinition().getColumnNameSet(); + final List conflicts = Arrays.stream(columnsToAdd) + .map(MatchPair::leftColumn) + .filter(leftKeys::contains) + .collect(Collectors.toList()); if (!conflicts.isEmpty()) { throw new RuntimeException("Conflicting column names " + conflicts); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/CrossJoinHelper.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/CrossJoinHelper.java index 2abd3bc1a45..c6fb7284e2f 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/CrossJoinHelper.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/CrossJoinHelper.java @@ -195,7 +195,7 @@ private static QueryTable internalJoin( jsm.startTrackingPrevValues(); final ModifiedColumnSet.Transformer leftTransformer = leftTable.newModifiedColumnSetTransformer( resultTable, - leftTable.getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY)); + leftTable.getDefinition().getColumnNamesArray()); leftTable.addUpdateListener(new BaseTable.ListenerImpl(bucketingContext.listenerDescription, leftTable, resultTable) { diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java deleted file mode 100644 index b03c2236206..00000000000 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/NoSuchColumnException.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending - */ -package io.deephaven.engine.table.impl; - -import java.util.Collection; -import java.util.Collections; - -/** - * Exception thrown when a column is not found. - */ -public class NoSuchColumnException extends IllegalArgumentException { - /** - * Thrown when an operation can not find a required column. - * - * @param presentColumns the column names present in the table - * @param requestedColumns the request column names that were not found - */ - public NoSuchColumnException(Collection presentColumns, Collection requestedColumns) { - super("Unknown column names [" + String.join(",", requestedColumns) - + "], available column names are [" + String.join(",", presentColumns) + "]"); - } - - /** - * Thrown when an operation can not find a required column. - * - * @param presentColumns the column names present in the table - * @param requestedColumn the request column name that was not found - */ - public NoSuchColumnException(Collection presentColumns, String requestedColumn) { - this(presentColumns, Collections.singleton(requestedColumn)); - } -} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java index 1b468be1e76..60189aba160 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java @@ -355,7 +355,7 @@ public long size() { public ColumnSource getColumnSource(String sourceName) { final ColumnSource columnSource = columns.get(sourceName); if (columnSource == null) { - throw new NoSuchColumnException(columns.keySet(), Collections.singletonList(sourceName)); + throw new NoSuchColumnException(columns.keySet(), sourceName); } // noinspection unchecked return (ColumnSource) columnSource; @@ -1772,14 +1772,7 @@ public Table dropColumns(String... columnNames) { return memoizeResult(MemoizedOperationKey.dropColumns(columnNames), () -> QueryPerformanceRecorder .withNugget("dropColumns(" + Arrays.toString(columnNames) + ")", sizeForInstrumentation(), () -> { final Mutable

result = new MutableObject<>(); - - final Set existingColumns = new HashSet<>(definition.getColumnNames()); - final Set columnNamesToDrop = new HashSet<>(Arrays.asList(columnNames)); - if (!existingColumns.containsAll(columnNamesToDrop)) { - columnNamesToDrop.removeAll(existingColumns); - throw new RuntimeException("Unknown columns: " + columnNamesToDrop - + ", available columns = " + getColumnSourceMap().keySet()); - } + definition.checkHasColumns(Arrays.asList(columnNames)); final Map> newColumns = new LinkedHashMap<>(columns); for (String columnName : columnNames) { newColumns.remove(columnName); @@ -1794,14 +1787,13 @@ public Table dropColumns(String... columnNames) { copyAttributes(resultTable, CopyAttributeOperation.DropColumns); copySortableColumns(resultTable, - resultTable.getDefinition().getColumnNameMap()::containsKey); + resultTable.getDefinition().getColumnNameSet()::contains); maybeCopyColumnDescriptions(resultTable); if (snapshotControl != null) { final ModifiedColumnSet.Transformer mcsTransformer = newModifiedColumnSetTransformer(resultTable, - resultTable.getColumnSourceMap().keySet() - .toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY)); + resultTable.getDefinition().getColumnNamesArray()); final ListenerImpl listener = new ListenerImpl( "dropColumns(" + Arrays.deepToString(columnNames) + ')', this, resultTable) { @Override @@ -2400,7 +2392,7 @@ private Table snapshotIncrementalInternal(final Table base, final boolean doInit // Use the given columns (if specified); otherwise an empty array means all of my columns final String[] useStampColumns = stampColumns.length == 0 - ? getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY) + ? definition.getColumnNamesArray() : stampColumns; final Map> triggerColumns = new LinkedHashMap<>(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/RedefinableTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/RedefinableTable.java index 2ca37e304ef..ed787d1baab 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/RedefinableTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/RedefinableTable.java @@ -84,15 +84,8 @@ public Table dropColumns(final String... columnNames) { if (columnNames == null || columnNames.length == 0) { return this; } - final Set columnNamesToDrop = new HashSet<>(Arrays.asList(columnNames)); - final Set existingColumns = new HashSet<>(definition.getColumnNames()); - if (!existingColumns.containsAll(columnNamesToDrop)) { - columnNamesToDrop.removeAll(existingColumns); - throw new RuntimeException("Unknown columns: " + columnNamesToDrop.toString() + ", available columns = " - + getColumnSourceMap().keySet()); - } - + definition.checkHasColumns(columnNamesToDrop); List> resultColumns = new ArrayList<>(); for (ColumnDefinition cDef : definition.getColumns()) { if (!columnNamesToDrop.contains(cDef.getName())) { diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/TableDefaults.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/TableDefaults.java index ee6dad28b78..836ca71260e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/TableDefaults.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/TableDefaults.java @@ -98,7 +98,7 @@ default boolean hasColumns(Collection columnNames) { if (columnNames == null) { throw new IllegalArgumentException("columnNames cannot be null!"); } - return getDefinition().getColumnNameMap().keySet().containsAll(columnNames); + return getDefinition().getColumnNameSet().containsAll(columnNames); } @Override diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/TableUpdateValidator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/TableUpdateValidator.java index 5f2190f2b29..9ec9ddf94a5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/TableUpdateValidator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/TableUpdateValidator.java @@ -61,11 +61,13 @@ private TableUpdateValidator(final String description, final QueryTable tableToV this.description = description == null ? tableToValidate.getDescription() : description; this.tableToValidate = tableToValidate; this.validationMCS = tableToValidate.newModifiedColumnSet( - tableToValidate.getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY)); + tableToValidate.getDefinition().getColumnNamesArray()); Assert.neq(validationMCS, "validationMCS", ModifiedColumnSet.ALL, "ModifiedColumnSet.ALL"); Assert.neq(validationMCS, "validationMCS", ModifiedColumnSet.EMPTY, "ModifiedColumnSet.EMPTY"); - columnInfos = tableToValidate.getColumnSourceMap().keySet().stream() + columnInfos = tableToValidate.getDefinition() + .getColumnStream() + .map(ColumnDefinition::getName) .map((name) -> new ColumnInfo(tableToValidate, name)) .toArray(ColumnInfo[]::new); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/ChunkedOperatorAggregationHelper.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/ChunkedOperatorAggregationHelper.java index 8ba1426b302..2d6c8f8dde6 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/ChunkedOperatorAggregationHelper.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/ChunkedOperatorAggregationHelper.java @@ -21,6 +21,7 @@ import io.deephaven.engine.rowset.chunkattributes.RowKeys; import io.deephaven.engine.table.*; import io.deephaven.engine.table.impl.*; +import io.deephaven.engine.table.impl.NoSuchColumnException.Type; import io.deephaven.engine.table.impl.by.typed.TypedHasherFactory; import io.deephaven.engine.table.impl.indexer.RowSetIndexer; import io.deephaven.engine.table.impl.remote.ConstructSnapshot; @@ -47,7 +48,6 @@ import java.util.*; import java.util.function.LongFunction; -import java.util.function.Predicate; import java.util.function.Supplier; import java.util.function.UnaryOperator; @@ -78,6 +78,18 @@ public static QueryTable aggregation( aggregationContextFactory, input, preserveEmpty, initialKeys, groupByColumns); } + private static void checkGroupByColumns(String context, TableDefinition tableDefinition, String[] keyNames) { + NoSuchColumnException.throwIf( + tableDefinition.getColumnNameSet(), + Arrays.asList(keyNames), + String.format( + "aggregation: not all group-by columns [%%s] are present in %s with columns [%%s]. Missing columns: [%%s]", + context), + Type.REQUESTED, + Type.AVAILABLE, + Type.MISSING); + } + @VisibleForTesting public static QueryTable aggregation( @NotNull final AggregationControl control, @@ -87,41 +99,22 @@ public static QueryTable aggregation( @Nullable final Table initialKeys, @NotNull final Collection groupByColumns) { final String[] keyNames = groupByColumns.stream().map(ColumnName::name).toArray(String[]::new); - if (!input.hasColumns(keyNames)) { - final Set colNames = input.getColumnSourceMap().keySet(); - final String[] missingColumns = Arrays.stream(keyNames) - .filter(Predicate.not(colNames::contains)) - .toArray(String[]::new);; - - throw new IllegalArgumentException("aggregation: not all group-by columns " + Arrays.toString(keyNames) - + " are present in input table with columns " - + Arrays.toString(input.getDefinition().getColumnNamesArray()) + ". Missing columns: " - + Arrays.toString(missingColumns)); - } + checkGroupByColumns("input table", input.getDefinition(), keyNames); if (initialKeys != null) { if (keyNames.length == 0) { throw new IllegalArgumentException( "aggregation: initial groups must not be specified if no group-by columns are specified"); } - if (!initialKeys.hasColumns(keyNames)) { - final Set colNames = input.getColumnSourceMap().keySet(); - final String[] missingColumns = Arrays.stream(keyNames) - .filter(Predicate.not(colNames::contains)) - .toArray(String[]::new);; - - throw new IllegalArgumentException("aggregation: not all group-by columns " + Arrays.toString(keyNames) - + " are present in initial groups table with columns " - + Arrays.toString(initialKeys.getDefinition().getColumnNamesArray()) + ". Missing columns: " - + Arrays.toString(missingColumns)); - } + checkGroupByColumns("initial groups", initialKeys.getDefinition(), keyNames); for (final String keyName : keyNames) { final ColumnDefinition inputDef = input.getDefinition().getColumn(keyName); final ColumnDefinition initialKeysDef = initialKeys.getDefinition().getColumn(keyName); if (!inputDef.isCompatible(initialKeysDef)) { - throw new IllegalArgumentException( - "aggregation: column definition mismatch between input table and initial groups table for " - + keyName + " input has " + inputDef.describeForCompatibility() - + ", initial groups has " + initialKeysDef.describeForCompatibility()); + throw new IllegalArgumentException(String.format( + "aggregation: column definition mismatch between input table and initial groups table for %s; input has %s, initial groups has %s", + keyName, + inputDef.describeForCompatibility(), + initialKeysDef.describeForCompatibility())); } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/PartitionByChunkedOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/PartitionByChunkedOperator.java index d053c54ee87..ad0c4c2fd74 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/PartitionByChunkedOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/PartitionByChunkedOperator.java @@ -41,6 +41,7 @@ import org.jetbrains.annotations.Nullable; import java.util.*; +import java.util.function.Predicate; import java.util.function.UnaryOperator; import java.util.stream.Collectors; @@ -167,11 +168,10 @@ public interface AttributeCopier { shiftDataBuilders = new ObjectArraySource<>(RowSetShiftData.SmartCoalescingBuilder.class); final Set keyColumnNameSet = Arrays.stream(keyColumnNames).collect(Collectors.toSet()); - final Set unadjustedParentColumnNameSet = - new LinkedHashSet<>(unadjustedParentTable.getDefinition().getColumnNames()); + final Set unadjustedParentColumnNameSet = unadjustedParentTable.getDefinition().getColumnNameSet(); final String[] retainedResultColumnNames = parentTable.getDefinition().getColumnStream() .map(ColumnDefinition::getName) - .filter(cn -> !keyColumnNameSet.contains(cn)) + .filter(Predicate.not(keyColumnNameSet::contains)) .filter(unadjustedParentColumnNameSet::contains) .toArray(String[]::new); final ModifiedColumnSet[] retainedResultModifiedColumnSets = Arrays.stream(retainedResultColumnNames) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/HierarchicalTableImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/HierarchicalTableImpl.java index 2f11ad45800..2d424a98460 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/HierarchicalTableImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/HierarchicalTableImpl.java @@ -136,13 +136,8 @@ IFACE_TYPE noopResult() { } @Override - protected void checkAvailableColumns(@NotNull final Collection columns) { - final Set availableColumns = root.getDefinition().getColumnNameMap().keySet(); - final List missingColumns = - columns.stream().filter(column -> !availableColumns.contains(column)).collect(Collectors.toList()); - if (!missingColumns.isEmpty()) { - throw new NoSuchColumnException(availableColumns, missingColumns); - } + protected final void checkAvailableColumns(@NotNull final Collection columns) { + root.getDefinition().checkHasColumns(columns); } /** diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/RollupTableImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/RollupTableImpl.java index 1b4f04dfcd9..0dc6f9b7be0 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/RollupTableImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/hierarchical/RollupTableImpl.java @@ -465,7 +465,7 @@ public static RollupTable makeRollup( source.getAttributes(ak -> shouldCopyAttribute(ak, CopyAttributeOperation.Rollup)), source, aggregations, includeConstituents, groupByColumns, levelTables, levelRowLookups, levelNodeTableSources, null, null, null, null, null); - source.copySortableColumns(result, baseLevel.getDefinition().getColumnNameMap()::containsKey); + source.copySortableColumns(result, baseLevel.getDefinition().getColumnNameSet()::contains); result.setColumnDescriptions(AggregationDescriptions.of(aggregations)); return result; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/remote/ConstructSnapshot.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/remote/ConstructSnapshot.java index c5ddf344299..1f72098d501 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/remote/ConstructSnapshot.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/remote/ConstructSnapshot.java @@ -1403,9 +1403,7 @@ private static boolean serializeAllTable( } LongSizedDataStructure.intSize("construct snapshot", snapshot.rowsIncluded.size()); - - final Map> sourceMap = table.getColumnSourceMap(); - final String[] columnSources = sourceMap.keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); + final String[] columnSources = table.getDefinition().getColumnNamesArray(); snapshot.dataColumns = new Object[columnSources.length]; try (final SharedContext sharedContext = @@ -1480,8 +1478,7 @@ private static boolean serializeAllTable( snapshot.rowsIncluded = snapshot.rowsAdded.copy(); } - final Map> sourceMap = table.getColumnSourceMap(); - final String[] columnSources = sourceMap.keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); + final String[] columnSources = table.getDefinition().getColumnNamesArray(); try (final SharedContext sharedContext = (columnSources.length > 1) ? SharedContext.makeSharedContext() : null) { diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MultiSourceFunctionalColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MultiSourceFunctionalColumn.java index 880a841660e..29d77004a69 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MultiSourceFunctionalColumn.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/MultiSourceFunctionalColumn.java @@ -95,22 +95,7 @@ public List initInputs(TrackingRowSet rowSet, Map initDef(Map> columnDefinitionMap) { - final MutableObject> missingColumnsHolder = new MutableObject<>(); - sourceNames.forEach(name -> { - final ColumnDefinition sourceColumnDefinition = columnDefinitionMap.get(name); - if (sourceColumnDefinition == null) { - List missingColumnsList; - if ((missingColumnsList = missingColumnsHolder.getValue()) == null) { - missingColumnsHolder.setValue(missingColumnsList = new ArrayList<>()); - } - missingColumnsList.add(name); - } - }); - - if (missingColumnsHolder.getValue() != null) { - throw new NoSuchColumnException(columnDefinitionMap.keySet(), missingColumnsHolder.getValue()); - } - + NoSuchColumnException.throwIf(columnDefinitionMap.keySet(), sourceNames); return getColumns(); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/snapshot/SnapshotInternalListener.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/snapshot/SnapshotInternalListener.java index 614aa53d42c..a807965046f 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/snapshot/SnapshotInternalListener.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/snapshot/SnapshotInternalListener.java @@ -33,7 +33,7 @@ public SnapshotInternalListener(QueryTable triggerTable, Map> resultTriggerColumns, Map> resultBaseColumns, TrackingWritableRowSet resultRowSet) { - super("snapshot " + result.getColumnSourceMap().keySet(), triggerTable, result); + super("snapshot " + result.getDefinition().getColumnNameSet(), triggerTable, result); this.triggerTable = triggerTable; this.result = result; this.lazySnapshot = lazySnapshot; diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java index c5af29411f1..6cf58505ba4 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java @@ -47,6 +47,7 @@ import java.util.concurrent.atomic.AtomicIntegerArray; import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.function.Consumer; +import java.util.stream.Collectors; import java.util.stream.IntStream; /** @@ -1188,7 +1189,7 @@ public static Table updateBy(@NotNull final QueryTable source, final Collection> windowSpecs = updateByOperatorFactory.getWindowOperatorSpecs(clauses); - if (windowSpecs.size() == 0) { + if (windowSpecs.isEmpty()) { throw new IllegalArgumentException("At least one operator must be specified"); } @@ -1198,7 +1199,7 @@ public static Table updateBy(@NotNull final QueryTable source, final MutableObject timestampColumnName = new MutableObject<>(null); // create an initial set of all source columns - final Set preservedColumnSet = new LinkedHashSet<>(source.getColumnSourceMap().keySet()); + final LinkedHashSet preservedColumnSet = new LinkedHashSet<>(source.getDefinition().getColumnNameSet()); final Set problems = new LinkedHashSet<>(); final Map> opResultSources = new LinkedHashMap<>(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/DynamicTableWriter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/DynamicTableWriter.java index d7edcb0b118..3322f56bb88 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/DynamicTableWriter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/DynamicTableWriter.java @@ -778,7 +778,7 @@ private DynamicTableRow() { public PermissiveRowSetter getSetter(final String name) { final PermissiveRowSetter rowSetter = columnToSetter.get(name); if (rowSetter == null) { - if (table.getColumnSourceMap().containsKey(name)) { + if (table.hasColumns(name)) { throw new RuntimeException("Column has a constant value, can not get setter " + name); } else { throw new RuntimeException("Unknown column name " + name); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/KeyedArrayBackedMutableTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/KeyedArrayBackedMutableTable.java index 5125422e47e..ad4221bbb90 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/KeyedArrayBackedMutableTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/KeyedArrayBackedMutableTable.java @@ -131,7 +131,7 @@ private KeyedArrayBackedMutableTable(@NotNull TableDefinition definition, final } private void startTrackingPrev() { - getColumnSourceMap().values().forEach(ColumnSource::startTrackingPrevValues); + getColumnSources().forEach(ColumnSource::startTrackingPrevValues); } @Override diff --git a/engine/table/src/main/java/io/deephaven/engine/util/OuterJoinTools.java b/engine/table/src/main/java/io/deephaven/engine/util/OuterJoinTools.java index b4bee084930..b63c329a9d5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/OuterJoinTools.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/OuterJoinTools.java @@ -68,10 +68,10 @@ public static Table fullOuterJoin( // find a sentinel column name to use to identify right-side only rows int numAttempts = 0; String sentinelColumnName; - final Map> resultColumns = leftTable.getColumnSourceMap(); + final Set resultColumns = leftTable.getDefinition().getColumnNameSet(); do { sentinelColumnName = "__sentinel_" + (numAttempts++) + "__"; - } while (resultColumns.containsKey(sentinelColumnName)); + } while (resultColumns.contains(sentinelColumnName)); // only need match columns from the left; rename to right names and drop remaining to avoid name conflicts final List leftColumns = Streams.concat( diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java b/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java index 1c9e9b666e9..b4b81a18ae5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TableTools.java @@ -1151,7 +1151,7 @@ public static byte[] computeFingerprint(Table source) throws IOException { final DataOutputStream osw = new DataOutputStream(new DigestOutputStream(new NullOutputStream(), md)); - for (final ColumnSource col : source.getColumnSourceMap().values()) { + for (final ColumnSource col : source.getColumnSources()) { processColumnForFingerprint(source.getRowSet(), col, osw); } diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TableToolsMergeHelper.java b/engine/table/src/main/java/io/deephaven/engine/util/TableToolsMergeHelper.java index c369a78ba4e..ee7deb4781c 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TableToolsMergeHelper.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TableToolsMergeHelper.java @@ -115,15 +115,15 @@ private static boolean canBreakOutUnionedTable(Table table) { if (!table.hasAttribute(Table.MERGED_TABLE_ATTRIBUTE)) { return false; } - Map> columnSourceMap = queryTable.getColumnSourceMap(); - if (columnSourceMap.isEmpty()) { + final Collection> columnSources = queryTable.getColumnSources(); + if (columnSources.isEmpty()) { return false; } - if (!columnSourceMap.values().stream().allMatch(cs -> cs instanceof UnionColumnSource)) { + if (!columnSources.stream().allMatch(cs -> cs instanceof UnionColumnSource)) { return false; } - final UnionColumnSource columnSource = (UnionColumnSource) columnSourceMap.values().iterator().next(); + final UnionColumnSource columnSource = (UnionColumnSource) columnSources.iterator().next(); return columnSource.getUnionSourceManager().isUsingComponentsSafe(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TickSuppressor.java b/engine/table/src/main/java/io/deephaven/engine/util/TickSuppressor.java index 38b65ec5c77..7bf58df0337 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TickSuppressor.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TickSuppressor.java @@ -131,7 +131,7 @@ public void onUpdate(TableUpdate upstream) { return; } - final int columnCount = resultTable.getColumnSourceMap().size(); + final int columnCount = resultTable.numColumns(); final int chunkSize = (int) Math.min(1 << 16, downstream.modified().size()); final ChunkSource.GetContext[] getContextArray = new ChunkSource.GetContext[columnCount]; diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java index d3399ec18ad..5d23a5e5f85 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TotalsTableBuilder.java @@ -6,6 +6,8 @@ import io.deephaven.api.agg.Aggregation; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.ColumnSource; +import io.deephaven.engine.table.impl.NoSuchColumnException; +import io.deephaven.engine.table.impl.NoSuchColumnException.Type; import io.deephaven.util.annotations.ScriptApi; import io.deephaven.util.type.EnumValue; import io.deephaven.util.type.TypeUtils; @@ -546,12 +548,12 @@ public static Table makeTotalsTable(Table source, TotalsTableBuilder builder, St } private static void ensureColumnsExist(Table source, Set columns) { - if (!source.getColumnSourceMap().keySet().containsAll(columns)) { - final Set missing = new LinkedHashSet<>(columns); - missing.removeAll(source.getColumnSourceMap().keySet()); - throw new IllegalArgumentException("Missing columns for totals table " + missing + ", available columns " - + source.getColumnSourceMap().keySet()); - } + NoSuchColumnException.throwIf( + source.getDefinition().getColumnNameSet(), + columns, + "Missing columns for totals table [%s], available columns [%s]", + Type.MISSING, + Type.AVAILABLE); } private static String[] makeColumnFormats(Table source, TotalsTableBuilder builder) { diff --git a/engine/table/src/main/java/io/deephaven/engine/util/WindowCheck.java b/engine/table/src/main/java/io/deephaven/engine/util/WindowCheck.java index e4204268d2f..846a25a2fd7 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/WindowCheck.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/WindowCheck.java @@ -195,7 +195,7 @@ public int getPos(Entry el) { this.rowKeyToEntry = new TLongObjectHashMap<>(); this.mcsTransformer = source.newModifiedColumnSetTransformer(result, - source.getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY)); + source.getDefinition().getColumnNamesArray()); this.mcsNewColumns = result.newModifiedColumnSet(inWindowColumnName); this.reusableModifiedColumnSet = new ModifiedColumnSet(this.mcsNewColumns); } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/MultiColumnSortTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/MultiColumnSortTest.java index d7e379f6c58..213ed2de322 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/MultiColumnSortTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/MultiColumnSortTest.java @@ -58,7 +58,7 @@ private void testMultiColumnSort(int seed, int size) { new BigIntegerGenerator(BigInteger.valueOf(100000), BigInteger.valueOf(100100)), new BigDecimalGenerator(BigInteger.valueOf(100000), BigInteger.valueOf(100100)))); - final List columnNames = new ArrayList<>(table.getColumnSourceMap().keySet()); + final List columnNames = table.getDefinition().getColumnNames(); doMultiColumnTest(table, SortColumn.asc(ColumnName.of("boolCol")), SortColumn.desc(ColumnName.of("Sym"))); diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableAggregationTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableAggregationTest.java index da435bf1a47..d62c00397e9 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableAggregationTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableAggregationTest.java @@ -116,12 +116,12 @@ private static Table individualStaticByTest(@NotNull final Table input, Arrays.stream(keySelectColumns).map(SelectColumn::getName).distinct().toArray(String[]::new); if (keyColumns.length == 0) { - expectedKeys = TableTools.emptyTable(adjustedInput.size() > 0 ? 1 : 0); + expectedKeys = TableTools.emptyTable(!adjustedInput.isEmpty() ? 1 : 0); expected = adjustedInput; } else { final Set retainedColumns = - new LinkedHashSet<>(adjustedInput.getDefinition().getColumnNameMap().keySet()); - retainedColumns.removeAll(Arrays.stream(keyNames).collect(Collectors.toSet())); + new LinkedHashSet<>(adjustedInput.getDefinition().getColumnNameSet()); + Arrays.asList(keyNames).forEach(retainedColumns::remove); final List allSelectColumns = Stream.concat(Arrays.stream(keySelectColumns), retainedColumns.stream().map(SourceColumn::new)) .collect(Collectors.toList()); @@ -887,7 +887,7 @@ public void testKeyColumnTypes() { new BigDecimalGenerator(), new IntGenerator())); - final Set keyColumnSet = new LinkedHashSet<>(table.getColumnSourceMap().keySet()); + final Set keyColumnSet = new LinkedHashSet<>(table.getDefinition().getColumnNameSet()); keyColumnSet.remove("NonKey"); final String[] keyColumns = keyColumnSet.toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java index 9c8ce50d1a1..5648009a55f 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableTest.java @@ -561,13 +561,17 @@ public void testDropColumns() { try { table.dropColumns(Collections.singletonList("DoesNotExist")); - } catch (RuntimeException e) { - assertEquals("Unknown columns: [DoesNotExist], available columns = [String, Int, Double]", e.getMessage()); + fail("Expected NoSuchColumnException"); + } catch (NoSuchColumnException e) { + assertEquals("Unknown column names [DoesNotExist], available column names are [String, Int, Double]", + e.getMessage()); } try { table.dropColumns(Arrays.asList("Int", "DoesNotExist")); - } catch (RuntimeException e) { - assertEquals("Unknown columns: [DoesNotExist], available columns = [String, Int, Double]", e.getMessage()); + fail("Expected NoSuchColumnException"); + } catch (NoSuchColumnException e) { + assertEquals("Unknown column names [DoesNotExist], available column names are [String, Int, Double]", + e.getMessage()); } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/SelectOverheadLimiter.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/SelectOverheadLimiter.java index 7e568a5ce67..f21d62024e6 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/SelectOverheadLimiter.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/SelectOverheadLimiter.java @@ -131,7 +131,7 @@ public static Table clampSelectOverhead(Table input, double permittedOverhead) { { inputRecorder.getValue().setMergedListener(this); inputTransformer = ((QueryTable) input).newModifiedColumnSetTransformer(result, - result.getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY)); + result.getDefinition().getColumnNamesArray()); } @Override @@ -171,7 +171,7 @@ protected void process() { new ListenerRecorder("clampSelectOverhead.flatResult()", flatResult, result); flatRecorder.setMergedListener(this); flatTransformer = ((QueryTable) flatResult).newModifiedColumnSetTransformer(result, - result.getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY)); + result.getDefinition().getColumnNamesArray()); flatResult.addUpdateListener(flatRecorder); synchronized (recorders) { diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestAggBy.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestAggBy.java index 0c2174f8a7d..e7ac11a5d1a 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestAggBy.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestAggBy.java @@ -818,7 +818,7 @@ public void testAggAllByWithFormatColumn() { assertEquals(2.0, cs.get(0)); result = dataTable.formatColumns("Doubles=Decimal(`##0.00%`)").headBy(1); - Set columnNames = result.getColumnSourceMap().keySet(); + List columnNames = result.getDefinition().getColumnNames(); assertEquals(3, columnNames.size()); // Additional column for formatting information of "Doubles" for (String colName : columnNames) { if (!colName.equalsIgnoreCase(doubleColName) && !colName.equalsIgnoreCase(intColName) && diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestMoveColumns.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestMoveColumns.java index e3f7109dad4..3edb395a20b 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestMoveColumns.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestMoveColumns.java @@ -3,6 +3,7 @@ */ package io.deephaven.engine.table.impl; +import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.engine.table.Table; import io.deephaven.engine.testutil.testcase.RefreshingTableTestCase; import io.deephaven.engine.util.TableTools; @@ -149,12 +150,15 @@ public void testMoveDownColumns() { } private void checkColumnOrder(Table t, String expectedOrder) { - final String order = t.getColumnSourceMap().keySet().stream().collect(Collectors.joining("")); + final String order = t.getDefinition() + .getColumnStream() + .map(ColumnDefinition::getName) + .collect(Collectors.joining("")); assertEquals(expectedOrder, order); } private void checkColumnValueOrder(Table t, String expectedOrder) { - final String order = t.getColumnSourceMap().values().stream().mapToInt((col) -> col.getInt(0)) + final String order = t.getColumnSources().stream().mapToInt((col) -> col.getInt(0)) .mapToObj(String::valueOf).collect(Collectors.joining("")); assertEquals(expectedOrder, order); } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestTotalsTable.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestTotalsTable.java index eb53b5674a2..66d40ade933 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/TestTotalsTable.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/TestTotalsTable.java @@ -20,6 +20,7 @@ import java.util.LinkedHashSet; import java.util.Map; import java.util.Random; +import java.util.Set; import static io.deephaven.engine.testutil.TstUtils.getTable; import static io.deephaven.engine.testutil.TstUtils.initColumnInfos; @@ -61,10 +62,10 @@ public void testTotalsTable() { final TotalsTableBuilder builder = new TotalsTableBuilder(); final Table totals = ExecutionContext.getContext().getUpdateGraph().exclusiveLock().computeLocked( () -> TotalsTableBuilder.makeTotalsTable(builder.applyToTable(queryTable))); - final Map> resultColumns = totals.getColumnSourceMap(); + final Set resultColumns = totals.getDefinition().getColumnNameSet(); assertEquals(1, totals.size()); assertEquals(new LinkedHashSet<>(Arrays.asList("intCol", "intCol2", "doubleCol", "doubleNullCol", "doubleCol2", - "floatCol", "byteCol", "shortCol")), resultColumns.keySet()); + "floatCol", "byteCol", "shortCol")), resultColumns); assertEquals((long) Numeric.sum((int[]) DataAccessHelpers.getColumn(queryTable, "intCol").getDirect()), DataAccessHelpers.getColumn(totals, "intCol").get(0)); @@ -85,7 +86,7 @@ public void testTotalsTable() { final Table totals2 = ExecutionContext.getContext().getUpdateGraph().exclusiveLock().computeLocked( () -> TotalsTableBuilder.makeTotalsTable(queryTable, builder)); assertEquals(new LinkedHashSet<>(Arrays.asList("Sym", "intCol2", "byteCol")), - totals2.getColumnSourceMap().keySet()); + totals2.getDefinition().getColumnNameSet()); assertEquals(Numeric.min((byte[]) DataAccessHelpers.getColumn(queryTable, "byteCol").getDirect()), DataAccessHelpers.getColumn(totals2, "byteCol").get(0)); assertEquals(DataAccessHelpers.getColumn(queryTable, "Sym").get(0), @@ -107,7 +108,7 @@ public void testTotalsTable() { assertEquals( new LinkedHashSet<>(Arrays.asList("Sym", "intCol2", "doubleCol", "doubleNullCol__Std", "doubleNullCol__Count", "doubleCol2", "byteCol", "shortCol")), - totals3.getColumnSourceMap().keySet()); + totals3.getDefinition().getColumnNameSet()); assertEquals( Numeric.max((byte[]) DataAccessHelpers.getColumn(queryTable, "byteCol").getDirect()), DataAccessHelpers.getColumn(totals3, "byteCol").getByte(0)); diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/indexer/TestRowSetIndexer.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/indexer/TestRowSetIndexer.java index 1c2a7d3cad7..637cb914643 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/indexer/TestRowSetIndexer.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/indexer/TestRowSetIndexer.java @@ -164,8 +164,8 @@ private void testGrouping(final boolean immutableColumns, final Random random, f private final ArrayList groupingValidators = new ArrayList<>(); private void addGroupingValidator(Table originalValue, String context) { - ArrayList> columnSets2 = powerSet(originalValue.getColumnSourceMap().keySet()); - ArrayList columnNames = new ArrayList<>(originalValue.getColumnSourceMap().keySet()); + ArrayList> columnSets2 = powerSet(originalValue.getDefinition().getColumnNameSet()); + ArrayList columnNames = new ArrayList<>(originalValue.getDefinition().getColumnNameSet()); columnSets2.add(columnNames); groupingValidators.add(new GroupingValidator(context, originalValue, columnSets2)); } diff --git a/engine/test-utils/src/main/java/io/deephaven/engine/testutil/TstUtils.java b/engine/test-utils/src/main/java/io/deephaven/engine/testutil/TstUtils.java index efd539bff3a..83b96d3fe56 100644 --- a/engine/test-utils/src/main/java/io/deephaven/engine/testutil/TstUtils.java +++ b/engine/test-utils/src/main/java/io/deephaven/engine/testutil/TstUtils.java @@ -20,6 +20,8 @@ import io.deephaven.engine.table.Table; import io.deephaven.engine.table.impl.AbstractColumnSource; import io.deephaven.engine.table.impl.BaseTable; +import io.deephaven.engine.table.impl.NoSuchColumnException; +import io.deephaven.engine.table.impl.NoSuchColumnException.Type; import io.deephaven.engine.table.impl.PrevColumnSource; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.select.Formula; @@ -173,11 +175,12 @@ public static void addToTable(final Table table, final RowSet rowSet, final Colu } } - if (!usedNames.containsAll(table.getColumnSourceMap().keySet())) { - final Set expected = new LinkedHashSet<>(table.getColumnSourceMap().keySet()); - expected.removeAll(usedNames); - throw new IllegalStateException("Not all columns were populated, missing " + expected); - } + NoSuchColumnException.throwIf( + usedNames, + table.getDefinition().getColumnNameSet(), + "Not all columns were populated, missing [%s], available [%s]", + Type.MISSING, + Type.AVAILABLE); table.getRowSet().writableCast().insert(rowSet); if (table.isFlat()) { diff --git a/engine/test-utils/src/main/java/io/deephaven/engine/testutil/locations/TableBackedColumnLocation.java b/engine/test-utils/src/main/java/io/deephaven/engine/testutil/locations/TableBackedColumnLocation.java index 6c4c9a27f2a..a0cce6fe00b 100644 --- a/engine/test-utils/src/main/java/io/deephaven/engine/testutil/locations/TableBackedColumnLocation.java +++ b/engine/test-utils/src/main/java/io/deephaven/engine/testutil/locations/TableBackedColumnLocation.java @@ -26,7 +26,7 @@ public final class TableBackedColumnLocation @NotNull final TableBackedTableLocation tableLocation, @NotNull final String name) { super(tableLocation, name); - columnSource = tableLocation.table().getDefinition().getColumnNameMap().containsKey(name) + columnSource = tableLocation.table().getDefinition().getColumnNameSet().contains(name) ? ReinterpretUtils.maybeConvertToPrimitive(tableLocation.table().getColumnSource(name)) : null; } diff --git a/extensions/jdbc/src/test/java/io/deephaven/jdbc/JdbcToTableAdapterTest.java b/extensions/jdbc/src/test/java/io/deephaven/jdbc/JdbcToTableAdapterTest.java index d301bf9d153..0c882baede9 100644 --- a/extensions/jdbc/src/test/java/io/deephaven/jdbc/JdbcToTableAdapterTest.java +++ b/extensions/jdbc/src/test/java/io/deephaven/jdbc/JdbcToTableAdapterTest.java @@ -87,7 +87,7 @@ public void testEmptyTable() throws SQLException { // check no-casing column names final Set expectedNames = Set.of("Bool_Type", "TinyIntType", "SmallIntType", "Int_Type", "Big_Int_Type", "Decimal_Type", "String_Type", "DateTime_Type"); - Assert.assertEquals(expectedNames, result.getColumnSourceMap().keySet()); + Assert.assertEquals(expectedNames, result.getDefinition().getColumnNameSet()); // should be an empty table Assert.assertEquals(0, result.size()); @@ -103,7 +103,7 @@ public void testLowerCamelCasing() throws SQLException { // check no-casing column names final Set expectedNames = Set.of("boolType", "tinyIntType", "smallIntType", "intType", "bigIntType", "decimalType", "stringType", "datetimeType"); - Assert.assertEquals(expectedNames, result.getColumnSourceMap().keySet()); + Assert.assertEquals(expectedNames, result.getDefinition().getColumnNameSet()); // should be an empty table Assert.assertEquals(0, result.size()); @@ -119,7 +119,7 @@ public void testLowercaseCasing() throws SQLException { // check no-casing column names final Set expectedNames = Set.of("bool_type", "tiny_int_type", "small_int_type", "int_type", "big_int_type", "decimal_type", "string_type", "datetime_type"); - Assert.assertEquals(expectedNames, result.getColumnSourceMap().keySet()); + Assert.assertEquals(expectedNames, result.getDefinition().getColumnNameSet()); // should be an empty table Assert.assertEquals(0, result.size()); @@ -135,7 +135,7 @@ public void testUpperCamelCasing() throws SQLException { // check no-casing column names final Set expectedNames = Set.of("BoolType", "TinyIntType", "SmallIntType", "IntType", "BigIntType", "DecimalType", "StringType", "DatetimeType"); - Assert.assertEquals(expectedNames, result.getColumnSourceMap().keySet()); + Assert.assertEquals(expectedNames, result.getDefinition().getColumnNameSet()); // should be an empty table Assert.assertEquals(0, result.size()); @@ -151,7 +151,7 @@ public void testUppercaseCasing() throws SQLException { // check no-casing column names final Set expectedNames = Set.of("BOOL_TYPE", "TINY_INT_TYPE", "SMALL_INT_TYPE", "INT_TYPE", "BIG_INT_TYPE", "DECIMAL_TYPE", "STRING_TYPE", "DATETIME_TYPE"); - Assert.assertEquals(expectedNames, result.getColumnSourceMap().keySet()); + Assert.assertEquals(expectedNames, result.getDefinition().getColumnNameSet()); // should be an empty table Assert.assertEquals(0, result.size()); @@ -167,7 +167,7 @@ public void testAlternateReplacement() throws SQLException { // check no-casing column names final Set expectedNames = Set.of("BOOL_Z_TYPE", "TINY_Z_INT_Z_TYPE", "SMALL_Z_INT_Z_TYPE", "INT_Z_TYPE", "BIG_Z_INT_Z_TYPE", "DECIMAL_Z_TYPE", "STRING_Z_TYPE", "DATETIME_Z_TYPE"); - Assert.assertEquals(expectedNames, result.getColumnSourceMap().keySet()); + Assert.assertEquals(expectedNames, result.getDefinition().getColumnNameSet()); // should be an empty table Assert.assertEquals(0, result.size()); diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 026617ed081..5924b9bbfa6 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -460,7 +460,9 @@ public void testVectorColumns() { writeReadTableTest(vectorTable, dest); // Convert the table from vector to array column - final Table arrayTable = vectorTable.updateView(vectorTable.getColumnSourceMap().keySet().stream() + final Table arrayTable = vectorTable.updateView(vectorTable.getDefinition() + .getColumnStream() + .map(ColumnDefinition::getName) .map(name -> name + " = " + name + ".toArray()") .toArray(String[]::new)); writeReadTableTest(arrayTable, dest); @@ -925,7 +927,7 @@ public void partitionedParquetWithDotFilesTest() throws IOException { ParquetTools.writeTable(someTable, secondDataFile); Table partitionedTable = ParquetTools.readTable(parentDir).select(); - final Set columnsSet = partitionedTable.getColumnSourceMap().keySet(); + final Set columnsSet = partitionedTable.getDefinition().getColumnNameSet(); assertTrue(columnsSet.size() == 2 && columnsSet.contains("A") && columnsSet.contains("X")); // Add an empty dot file and dot directory (with valid parquet files) in one of the partitions @@ -1393,8 +1395,7 @@ private void assertTableStatistics(Table inputTable, File dest) { // Verify that the columns have the correct statistics. final ParquetMetadata metadata = new ParquetTableLocationKey(dest, 0, null).getMetadata(); - final String[] colNames = - inputTable.getColumnSourceMap().keySet().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); + final String[] colNames = inputTable.getDefinition().getColumnNamesArray(); for (int colIdx = 0; colIdx < inputTable.numColumns(); ++colIdx) { final String colName = colNames[colIdx]; diff --git a/server/src/main/java/io/deephaven/server/table/ops/SelectDistinctGrpcImpl.java b/server/src/main/java/io/deephaven/server/table/ops/SelectDistinctGrpcImpl.java index c96364c4ce6..0666a06ab4b 100644 --- a/server/src/main/java/io/deephaven/server/table/ops/SelectDistinctGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/table/ops/SelectDistinctGrpcImpl.java @@ -14,9 +14,9 @@ import javax.inject.Inject; import javax.inject.Singleton; -import java.util.HashSet; import java.util.List; -import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; @Singleton public class SelectDistinctGrpcImpl extends GrpcTableOperation { @@ -34,8 +34,10 @@ public Table create(final SelectDistinctRequest request, final Table parent = sourceTables.get(0).get(); // explicitly disallow column expressions - final Set requestedMissing = new HashSet<>(request.getColumnNamesList()); - requestedMissing.removeAll(parent.getDefinition().getColumnNameMap().keySet()); + final List requestedMissing = request.getColumnNamesList() + .stream() + .filter(Predicate.not(parent.getDefinition().getColumnNameSet()::contains)) + .collect(Collectors.toList()); if (!requestedMissing.isEmpty()) { throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "column(s) not found: " + String.join(", ", requestedMissing)); From f949bb07b3e0a4f3d87fa626a3ed6e2a0627fc24 Mon Sep 17 00:00:00 2001 From: Ryan Caudy Date: Wed, 15 Nov 2023 10:55:09 -0500 Subject: [PATCH 23/41] Use chunks in DynamicWhereFilter, fix a chunk leak, and cleanup the code generally (#4826) * Use chunks in DynamicWhereFilter for all column reading * Combine the single-column and multi-column linear filter paths --- .../table/impl/select/DynamicWhereFilter.java | 177 +++++++++--------- .../table/impl/QueryTableWhereTest.java | 17 ++ 2 files changed, 109 insertions(+), 85 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DynamicWhereFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DynamicWhereFilter.java index 22e5d7ed7cd..29152e038d5 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DynamicWhereFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DynamicWhereFilter.java @@ -4,12 +4,15 @@ package io.deephaven.engine.table.impl.select; import io.deephaven.base.log.LogOutput; +import io.deephaven.base.verify.Assert; import io.deephaven.chunk.attributes.Values; import io.deephaven.datastructures.util.CollectionUtil; +import io.deephaven.engine.primitive.iterator.CloseableIterator; import io.deephaven.engine.rowset.*; import io.deephaven.engine.rowset.RowSetFactory; import io.deephaven.engine.table.*; import io.deephaven.engine.table.impl.indexer.RowSetIndexer; +import io.deephaven.engine.table.iterators.ChunkedColumnIterator; import io.deephaven.engine.updategraph.NotificationQueue; import io.deephaven.engine.updategraph.DynamicNode; import io.deephaven.engine.table.impl.*; @@ -19,16 +22,14 @@ import io.deephaven.chunk.WritableLongChunk; import io.deephaven.engine.table.impl.TupleSourceFactory; import io.deephaven.engine.updategraph.UpdateGraph; -import io.deephaven.io.log.impl.LogOutputStringImpl; import io.deephaven.engine.rowset.chunkattributes.OrderedRowKeys; -import org.apache.commons.lang3.mutable.MutableBoolean; import org.jetbrains.annotations.NotNull; import java.util.*; /** * A where filter that extracts a set of inclusion or exclusion keys from a set table. - * + *

* Each time the set table ticks, the entire where filter is recalculated. */ public class DynamicWhereFilter extends WhereFilterLivenessArtifactImpl implements NotificationQueue.Dependency { @@ -70,46 +71,83 @@ public DynamicWhereFilter(final QueryTable setTable, final boolean inclusion, fi if (setRefreshing) { this.setTable = setTable; setTupleSource = TupleSourceFactory.makeTupleSource(setColumns); - setTable.getRowSet().forAllRowKeys((final long v) -> addKey(makeKey(v))); + if (setTable.getRowSet().isNonempty()) { + try (final CloseableIterator initialKeysIterator = ChunkedColumnIterator.make( + setTupleSource, setTable.getRowSet(), getChunkSize(setTable.getRowSet()))) { + initialKeysIterator.forEachRemaining(this::addKey); + } + } - final String[] columnNames = Arrays.stream(matchPairs).map(MatchPair::rightColumn).toArray(String[]::new); - final ModifiedColumnSet modTokenSet = setTable.newModifiedColumnSet(columnNames); + final String[] setColumnNames = + Arrays.stream(matchPairs).map(MatchPair::rightColumn).toArray(String[]::new); + final ModifiedColumnSet setColumnsMCS = setTable.newModifiedColumnSet(setColumnNames); setUpdateListener = new InstrumentedTableUpdateListenerAdapter( "DynamicWhereFilter(" + Arrays.toString(setColumnsNames) + ")", setTable, false) { @Override public void onUpdate(final TableUpdate upstream) { - if (upstream.added().isEmpty() && upstream.removed().isEmpty() - && !upstream.modifiedColumnSet().containsAny(modTokenSet)) { + final boolean hasAdds = upstream.added().isNonempty(); + final boolean hasRemoves = upstream.removed().isNonempty(); + final boolean hasModifies = upstream.modified().isNonempty() + && upstream.modifiedColumnSet().containsAny(setColumnsMCS); + if (!hasAdds && !hasRemoves && !hasModifies) { return; } - final MutableBoolean trueModification = new MutableBoolean(false); + // Remove removed keys + if (hasRemoves) { + try (final CloseableIterator removedKeysIterator = ChunkedColumnIterator.make( + setTupleSource.getPrevSource(), upstream.removed(), getChunkSize(upstream.removed()))) { + removedKeysIterator.forEachRemaining(DynamicWhereFilter.this::removeKey); + } + } - upstream.added().forAllRowKeys((final long v) -> addKey(makeKey(v))); - upstream.removed().forAllRowKeys((final long v) -> removeKey(makePrevKey(v))); + // Update modified keys + boolean trueModification = false; + if (hasModifies) { + // @formatter:off + try (final CloseableIterator preModifiedKeysIterator = ChunkedColumnIterator.make( + setTupleSource.getPrevSource(), upstream.getModifiedPreShift(), + getChunkSize(upstream.getModifiedPreShift())); + final CloseableIterator postModifiedKeysIterator = ChunkedColumnIterator.make( + setTupleSource, upstream.modified(), + getChunkSize(upstream.modified()))) { + // @formatter:on + while (preModifiedKeysIterator.hasNext()) { + Assert.assertion(postModifiedKeysIterator.hasNext(), + "Pre and post modified row sets must be the same size; post is exhausted, but pre is not"); + final Object oldKey = preModifiedKeysIterator.next(); + final Object newKey = postModifiedKeysIterator.next(); + if (!Objects.equals(oldKey, newKey)) { + trueModification = true; + removeKey(oldKey); + addKey(newKey); + } + } + Assert.assertion(!postModifiedKeysIterator.hasNext(), + "Pre and post modified row sets must be the same size; pre is exhausted, but post is not"); + } + } - upstream.forAllModified((preIndex, postIndex) -> { - final Object oldKey = makePrevKey(preIndex); - final Object newKey = makeKey(postIndex); - if (!Objects.equals(oldKey, newKey)) { - trueModification.setTrue(); - removeKey(oldKey); - addKey(newKey); + // Add added keys + if (hasAdds) { + try (final CloseableIterator addedKeysIterator = ChunkedColumnIterator.make( + setTupleSource, upstream.added(), getChunkSize(upstream.added()))) { + addedKeysIterator.forEachRemaining(DynamicWhereFilter.this::addKey); } - }); + } // Pretend every row of the original table was modified, this is essential so that the where clause // can be re-evaluated based on the updated live set. if (listener != null) { - if (upstream.added().isNonempty() || trueModification.booleanValue()) { + if (hasAdds || trueModification) { if (inclusion) { listener.requestRecomputeUnmatched(); } else { listener.requestRecomputeMatched(); } } - if (upstream.removed().isNonempty() || trueModification.booleanValue()) { + if (hasRemoves || trueModification) { if (inclusion) { listener.requestRecomputeMatched(); } else { @@ -132,8 +170,13 @@ public void onFailureInternal(Throwable originalException, Entry sourceEntry) { } else { this.setTable = null; setTupleSource = null; - final TupleSource temporaryTupleSource = TupleSourceFactory.makeTupleSource(setColumns); - setTable.getRowSet().forAllRowKeys((final long v) -> addKeyUnchecked(makeKey(temporaryTupleSource, v))); + if (setTable.getRowSet().isNonempty()) { + final TupleSource temporaryTupleSource = TupleSourceFactory.makeTupleSource(setColumns); + try (final CloseableIterator initialKeysIterator = ChunkedColumnIterator.make( + temporaryTupleSource, setTable.getRowSet(), getChunkSize(setTable.getRowSet()))) { + initialKeysIterator.forEachRemaining(this::addKeyUnchecked); + } + } kernelValid = liveValuesArrayValid = false; setInclusionKernel = null; setUpdateListener = null; @@ -145,18 +188,6 @@ public UpdateGraph getUpdateGraph() { return updateGraph; } - private Object makeKey(long index) { - return makeKey(setTupleSource, index); - } - - private static Object makeKey(TupleSource tupleSource, long index) { - return tupleSource.createTuple(index); - } - - private Object makePrevKey(long index) { - return setTupleSource.createPreviousTuple(index); - } - private void removeKey(Object key) { final boolean removed = liveValues.remove(key); if (!removed) { @@ -225,7 +256,7 @@ public WritableRowSet filter( if (selection.size() > (selectionIndexer.getGrouping(tupleSource).size() * 2L)) { return filterGrouping(trackingSelection, selectionIndexer, tupleSource); } else { - return filterLinear(selection, keyColumns, tupleSource); + return filterLinear(selection, tupleSource); } } final boolean allGrouping = Arrays.stream(keyColumns).allMatch(selectionIndexer::hasGrouping); @@ -241,80 +272,61 @@ public WritableRowSet filter( return filterGrouping(trackingSelection, selectionIndexer, tupleSource); } } - return filterLinear(selection, keyColumns, tupleSource); + return filterLinear(selection, tupleSource); } - private WritableRowSet filterGrouping(TrackingRowSet selection, RowSetIndexer selectionIndexer, + private WritableRowSet filterGrouping( + TrackingRowSet selection, + RowSetIndexer selectionIndexer, TupleSource tupleSource) { final RowSet matchingKeys = selectionIndexer.getSubSetForKeySet(liveValues, tupleSource); return (inclusion ? matchingKeys.copy() : selection.minus(matchingKeys)); } - private WritableRowSet filterGrouping(TrackingRowSet selection, RowSetIndexer selectionIndexer, Table table) { - final ColumnSource[] keyColumns = Arrays.stream(matchPairs) - .map(mp -> table.getColumnSource(mp.leftColumn())).toArray(ColumnSource[]::new); - final TupleSource tupleSource = TupleSourceFactory.makeTupleSource(keyColumns); - return filterGrouping(selection, selectionIndexer, tupleSource); - } - - private WritableRowSet filterLinear(RowSet selection, ColumnSource[] keyColumns, TupleSource tupleSource) { - if (keyColumns.length == 1) { - return filterLinearOne(selection, keyColumns[0]); - } else { - return filterLinearTuple(selection, tupleSource); - } - } - - private WritableRowSet filterLinearOne(RowSet selection, ColumnSource keyColumn) { + private WritableRowSet filterLinear(RowSet selection, TupleSource tupleSource) { if (selection.isEmpty()) { return RowSetFactory.empty(); } if (!kernelValid) { - setInclusionKernel = SetInclusionKernel.makeKernel(keyColumn.getChunkType(), liveValues, inclusion); + setInclusionKernel = SetInclusionKernel.makeKernel(tupleSource.getChunkType(), liveValues, inclusion); kernelValid = true; } final RowSetBuilderSequential indexBuilder = RowSetFactory.builderSequential(); - try (final ColumnSource.GetContext getContext = keyColumn.makeGetContext(CHUNK_SIZE); - final RowSequence.Iterator rsIt = selection.getRowSequenceIterator()) { - final WritableLongChunk keyIndices = WritableLongChunk.makeWritableChunk(CHUNK_SIZE); - final WritableBooleanChunk matches = WritableBooleanChunk.makeWritableChunk(CHUNK_SIZE); + final int maxChunkSize = getChunkSize(selection); + // @formatter:off + try (final ColumnSource.GetContext keyGetContext = tupleSource.makeGetContext(maxChunkSize); + final RowSequence.Iterator selectionIterator = selection.getRowSequenceIterator(); + final WritableLongChunk selectionRowKeyChunk = + WritableLongChunk.makeWritableChunk(maxChunkSize); + final WritableBooleanChunk matches = WritableBooleanChunk.makeWritableChunk(maxChunkSize)) { + // @formatter:on - while (rsIt.hasMore()) { - final RowSequence chunkOk = rsIt.getNextRowSequenceWithLength(CHUNK_SIZE); + while (selectionIterator.hasMore()) { + final RowSequence selectionChunk = selectionIterator.getNextRowSequenceWithLength(maxChunkSize); - final Chunk chunk = Chunk.downcast(keyColumn.getChunk(getContext, chunkOk)); - setInclusionKernel.matchValues(chunk, matches); + final Chunk keyChunk = Chunk.downcast(tupleSource.getChunk(keyGetContext, selectionChunk)); + final int thisChunkSize = keyChunk.size(); + setInclusionKernel.matchValues(keyChunk, matches); - keyIndices.setSize(chunk.size()); - chunkOk.fillRowKeyChunk(keyIndices); + selectionRowKeyChunk.setSize(thisChunkSize); + selectionChunk.fillRowKeyChunk(selectionRowKeyChunk); - for (int ii = 0; ii < chunk.size(); ++ii) { + for (int ii = 0; ii < thisChunkSize; ++ii) { if (matches.get(ii)) { - indexBuilder.appendKey(keyIndices.get(ii)); + indexBuilder.appendKey(selectionRowKeyChunk.get(ii)); } } } } - return indexBuilder.build(); } - private WritableRowSet filterLinearTuple(RowSet selection, TupleSource tupleSource) { - final RowSetBuilderSequential indexBuilder = RowSetFactory.builderSequential(); - - for (final RowSet.Iterator it = selection.iterator(); it.hasNext();) { - final long row = it.nextLong(); - final Object tuple = tupleSource.createTuple(row); - if (liveValues.contains(tuple) == inclusion) { - indexBuilder.appendKey(row); - } - } - - return indexBuilder.build(); + private static int getChunkSize(@NotNull final RowSet selection) { + return (int) Math.min(selection.size(), CHUNK_SIZE); } @Override @@ -352,9 +364,4 @@ public LogOutput append(LogOutput logOutput) { return logOutput.append("DynamicWhereFilter(").append(MatchPair.MATCH_PAIR_ARRAY_FORMATTER, matchPairs) .append(")"); } - - @Override - public String toString() { - return new LogOutputStringImpl().append(this).toString(); - } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java index 4b2058bbeb3..5d6621d0eb0 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/QueryTableWhereTest.java @@ -47,7 +47,9 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.IntSupplier; import java.util.function.IntUnaryOperator; +import java.util.stream.IntStream; import static io.deephaven.engine.testutil.TstUtils.*; import static io.deephaven.engine.testutil.testcase.RefreshingTableTestCase.printTableUpdates; @@ -417,6 +419,21 @@ public void testWhereDynamicIn() { asList((String[]) DataAccessHelpers.getColumn(result, "X").getDirect())); assertEquals(1, resultInverse.size()); assertEquals(asList("E"), asList((String[]) DataAccessHelpers.getColumn(resultInverse, "X").getDirect())); + + // Real modification to set table, followed by spurious modification to set table + IntStream.range(0, 2).forEach(ri -> { + updateGraph.runWithinUnitTestCycle(() -> { + addToTable(setTable, i(7), col("X", "C")); + setTable.notifyListeners(i(), i(), i(7)); + }); + showWithRowSet(result); + assertEquals(4, result.size()); + assertEquals(asList("A", "B", "C", "A"), + asList((String[]) DataAccessHelpers.getColumn(result, "X").getDirect())); + assertEquals(2, resultInverse.size()); + assertEquals(asList("D", "E"), + asList((String[]) DataAccessHelpers.getColumn(resultInverse, "X").getDirect())); + }); } @Test From 9e9eb2273349b4f9b39545abade6081d4321550e Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Wed, 15 Nov 2023 08:58:29 -0700 Subject: [PATCH 24/41] Fix a docstring error (#4833) --- py/server/deephaven/pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/server/deephaven/pandas.py b/py/server/deephaven/pandas.py index c14ae1f9ca7..883622ce27b 100644 --- a/py/server/deephaven/pandas.py +++ b/py/server/deephaven/pandas.py @@ -130,7 +130,7 @@ def to_pandas(table: Table, cols: List[str] = None, dtypes. Both "numpy_nullable" and "pyarrow" automatically convert Deephaven nulls to Pandas NA and enable Pandas extension types. Extension types are needed to support types beyond NumPy's type system. Extension types support operations such as properly mapping Java Strings to Python strings. default is "numpy_nullable". - conv_null (bool): when dtype_backend is not set, whether to check for Deephaven nulls in the data and + conv_null (bool): when dtype_backend is set to None, whether to check for Deephaven nulls in the data and automatically replace them with pd.NA. default is True. Returns: From 20967852ef41df72ca7690e5e9a8dbf4807dc33e Mon Sep 17 00:00:00 2001 From: Nate Bauernfeind Date: Wed, 15 Nov 2023 09:38:43 -0700 Subject: [PATCH 25/41] ExportObject PUBLISHING State Change Bug (#4835) --- .../java/io/deephaven/server/session/SessionState.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/io/deephaven/server/session/SessionState.java b/server/src/main/java/io/deephaven/server/session/SessionState.java index 2ba3045be61..56b98bbc751 100644 --- a/server/src/main/java/io/deephaven/server/session/SessionState.java +++ b/server/src/main/java/io/deephaven/server/session/SessionState.java @@ -682,7 +682,11 @@ private synchronized void setWork( this.errorHandler = errorHandler; this.successHandler = successHandler; - setState(ExportNotification.State.PENDING); + if (state != ExportNotification.State.PUBLISHING) { + setState(ExportNotification.State.PENDING); + } else if (dependentCount > 0) { + throw new IllegalStateException("published exports cannot have dependencies"); + } if (dependentCount <= 0) { dependentCount = 0; scheduleExport(); @@ -920,7 +924,7 @@ private void onResolveOne(@Nullable final ExportObject parent) { */ private void scheduleExport() { synchronized (this) { - if (state != ExportNotification.State.PENDING) { + if (state != ExportNotification.State.PENDING && state != ExportNotification.State.PUBLISHING) { return; } setState(ExportNotification.State.QUEUED); From 81a90cbbb2b5898866bfe2dd733dee89cc17ccd0 Mon Sep 17 00:00:00 2001 From: JJ Brosnan <84038776+jjbrosnan@users.noreply.github.com> Date: Wed, 15 Nov 2023 15:02:28 -0500 Subject: [PATCH 26/41] README rework (#4209) * Updates to README * Big updates * Updates from Devin's review - should we add a BUILD.md file? * Minor update * Updates from Chip's review * Minor update * Updates from Chip's latest review * Add note about supported browsers with link to web client UI README section * Update README.md * Updates from Chip's review --------- Co-authored-by: margaretkennedy <82049573+margaretkennedy@users.noreply.github.com> --- README.md | 331 ++++++++++++++++++++++++++---------------------------- 1 file changed, 160 insertions(+), 171 deletions(-) diff --git a/README.md b/README.md index 4499dd21180..52d95f1c34e 100644 --- a/README.md +++ b/README.md @@ -9,15 +9,16 @@ Deephaven includes an intuitive user experience and visualization tools. It can ingest data from a variety of sources, apply computation and analysis algorithms to that data, and build rich queries, dashboards, and representations with the results. -Deephaven Community Core is an open version of [Deephaven Enterprise](https://deephaven.io), +Deephaven Community Core is the open version of [Deephaven Enterprise](https://deephaven.io), which functions as the data backbone for prominent hedge funds, banks, and financial exchanges. -[![Join the chat at https://gitter.im/deephaven/deephaven](https://badges.gitter.im/deephaven/deephaven.svg)](https://gitter.im/deephaven/deephaven?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -![Build CI](https://github.com/deephaven/deephaven-core/actions/workflows/build-ci.yml/badge.svg?branch=main) -![Quick CI](https://github.com/deephaven/deephaven-core/actions/workflows/quick-ci.yml/badge.svg?branch=main) -![Docs CI](https://github.com/deephaven/deephaven-core/actions/workflows/docs-ci.yml/badge.svg?branch=main) -![Check CI](https://github.com/deephaven/deephaven-core/actions/workflows/check-ci.yml/badge.svg?branch=main) -![Nightly Check CI](https://github.com/deephaven/deephaven-core/actions/workflows/nightly-check-ci.yml/badge.svg?branch=main) +- ![Build CI](https://github.com/deephaven/deephaven-core/actions/workflows/build-ci.yml/badge.svg?branch=main) +- ![Quick CI](https://github.com/deephaven/deephaven-core/actions/workflows/quick-ci.yml/badge.svg?branch=main) +- ![Docs CI](https://github.com/deephaven/deephaven-core/actions/workflows/docs-ci.yml/badge.svg?branch=main) +- ![Check CI](https://github.com/deephaven/deephaven-core/actions/workflows/check-ci.yml/badge.svg?branch=main) +- ![Nightly Check CI](https://github.com/deephaven/deephaven-core/actions/workflows/nightly-check-ci.yml/badge.svg?branch=main) + +This README is intended to provide a high-level overview of the installation and use of Deephaven Community Core. For more detailed guides on the topics presented below, see our [Community documentation](https://deephaven.io/core/docs). ## Supported Languages @@ -27,247 +28,233 @@ which functions as the data backbone for prominent hedge funds, banks, and finan | Java / Groovy | Yes | Yes | | C++ | No | Yes | | JavaScript | No | Yes | -| gRPC | - | Yes | - -## Run Deephaven +| Go | No | Yes | +| R | No | Yes | -This section is a quick start guide for running Deephaven from pre-built images. Almost all users will want to run Deephaven using pre-built images. It is the easiest way to deploy. For detailed instructions, see [Launch Deephaven from pre-built images](https://deephaven.io/core/docs/tutorials/quickstart). +Deephaven's client APIs use [gRPC](https://grpc.io/), [protobuf](https://github.com/deephaven/deephaven-core/tree/main/proto/proto-backplane-grpc/src/main/proto/deephaven/proto), [Apache Arrow Flight](https://arrow.apache.org/docs/format/Flight.html), and [Barrage](https://github.com/deephaven/barrage) to handle ticking data. Users who wish to build their own client APIs can use these tools to do so. -Developers interested in tinkering with and modifying source code should build from the source code. For detailed instructions on how to do this, see [Build and launch Deephaven](https://deephaven.io/core/docs/how-to-guides/launch-build). +The following list contains documentation links for installation instructions and more: -If you are not sure which of the two is right for you, use the pre-built images. +- Python + - [Run from Docker](https://deephaven.io/core/docs/tutorials/quickstart/) + - [pip-installed](https://deephaven.io/core/docs/tutorials/quickstart-pip/) +- Groovy + - [Run from Docker](https://deephaven.io/core/groovy/docs/tutorials/quickstart/) +- [Python client](https://pypi.org/project/pydeephaven/) +- [Java client](https://deephaven.io/core/docs/how-to-guides/java-client/) +- [JS client](https://deephaven.io/core/docs/reference/js-api/documentation/) +- [Go client](https://pkg.go.dev/github.com/deephaven/deephaven-core/go) +- [R client](https://github.com/deephaven/deephaven-core/blob/main/R/rdeephaven/README.md) -### Required Dependencies +## Install and run Deephaven -Running Deephaven requires a few software packages. +The Deephaven server can be installed and instantiated [from Docker](#from-docker), [from Python](#from-python), or [from source code](#built-from-source). -| Package | Version | OS | -| -------------- | ----------------------------- | ------------ | -| docker | ^20.10.8 | All | -| docker-compose | ^1.29.0 | All | -| Windows | 10 (OS build 20262 or higher) | Only Windows | -| WSL | 2 | Only Windows | +### From Docker -You can check if these packages are installed and functioning by running: -``` -docker version -docker-compose version -docker run hello-world -``` +This is the easiest way to get started with Deephaven. For complete instructions, see our [quickstart for Docker](https://deephaven.io/core/docs/tutorials/quickstart/). The table below shows installation dependencies. -> :warning: **On Windows, all commands must be run inside a WSL 2 terminal.** +| Dependency | Version | OS | Required/Recommended | +| -------------- | -------- | --------| -------------------- | +| Docker | ^20.10.8 | All | Required | +| Docker compose | ^2 | All | Recommended | +| Windows | 10+ | Windows | Required | +| WSL | ^2 | Windows | Required | -If any dependencies are missing or unsupported versions are installed, see [Launch Deephaven from pre-built images](https://deephaven.io/core/docs/tutorials/quickstart#prerequisites) for installation instructions. +The quickest way to install and run Deephaven from Docker is with a single Docker command: +**Python without Docker Compose** -For running the unit tests, you will also need to install [Git LFS](https://git-lfs.com/) and fetch all the required files. Run the following commands from inside the directory: +```sh +# Python +docker run --rm --name deephaven -p 10000:10000 ghcr.io/deephaven/server:latest ``` -git lfs install -git lfs pull + +**Groovy without Docker Compose** + +```sh +# Groovy +docker run --rm name deephaven -p 10000:10000 ghcr.io/deephaven/server-slim:latest ``` -### Create deployment +Users who wish to customize their deployment should use Docker Compose. Deephaven offers a multitude of pre-made [docker-compose.yml files](https://deephaven.io/core/docs/tutorials/quickstart/#choose-a-deployment) to choose from. To get started, all that's required is to download a file, pull the images, and start the server. -A directory must be created to store files and mount points for your deployment. Here, we are using the `deephaven-deployment` directory. +**Python with Docker Compose** -You will need to `cd` into the deployment directory to launch or interact with the deployment. +The base Python `docker-compose.yml` file can be found [here](https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/base/docker-compose.yml). -```bash +```sh mkdir deephaven-deployment cd deephaven-deployment -``` - -> :warning: **Commands in the following sections for interacting with a deployment must be run from the deployment directory.** -### Launch: Python +curl -O https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/base/docker-compose.yml -Run the following commands to launch Deephaven for Python server applications. - -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python/base/docker-compose.yml -O -docker-compose pull -docker-compose up -d +docker compose pull +docker compose up ``` -### Launch: Python with NLTK +**Groovy with Docker Compose** -Run the following commands to launch Deephaven for Python server applications with the [NLTK](https://nltk.org/) module pre-installed. - -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python/NLTK/docker-compose.yml -O -docker-compose pull -docker-compose up -d -``` +The base Groovy `docker-compose.yml` file can be found [here](https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/groovy/docker-compose.yml). -### Launch: Python with PyTorch +```sh +mkdir deephaven-deployment +cd deephaven-deployment -Run the following commands to launch Deephaven for Python server applications with the [PyTorch](https://pytorch.org/) module pre-installed. +curl -O https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/groovy/docker-compose.yml -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python/PyTorch/docker-compose.yml -O -docker-compose pull -docker-compose up -d +docker compose pull +docker compose up ``` -### Launch: Python with SciKit-Learn +### pip-installed Deephaven -Run the following commands to launch Deephaven for Python server applications with the [SciKit-Learn](https://scikit-learn.org/stable/) module pre-installed. +Users who wish to use Python but not Docker should use [pip-installed Deephaven](https://deephaven.io/core/docs/tutorials/quickstart-pip/). -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python/SciKit-Learn/docker-compose.yml -O -docker-compose pull -docker-compose up -d +```sh +pip install --upgrade pip setuptools wheel +pip install deephaven-server deephaven-ipywidgets ``` -### Launch: Python with TensorFlow - -Run the following commands to launch Deephaven for Python server applications with the [TensorFlow](https://www.tensorflow.org/) module pre-installed. +Then, from Python: -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python/TensorFlow/docker-compose.yml -O -docker-compose pull -docker-compose up -d +```python +from deephaven_server import Server +s = Server(port=10000, jvm_args=["-Xmx4g"]).start() ``` -### Launch: Python with example data +The input arguments to `Server` specify to bind to the Deephaven server on port `10000` and to allocate 4GB of memory to the server JVM. -Run the following commands to launch Deephaven for Python server applications, with example data. +### Built from source -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/base/docker-compose.yml -O -docker-compose pull -docker-compose up -d -``` - -### Launch: Python with example data and NLTK - -Run the following commands to launch Deephaven for Python server applications, with example data and [NLTK](https://nltk.org/). +Users who wish to modify source code and contribute to the project should build Deephaven from source. For complete instructions, see [How to build Deephaven from source](https://deephaven.io/core/docs/how-to-guides/launch-build/). -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/NLTK/docker-compose.yml -O -docker-compose pull -docker-compose up -d -``` +Building and running Deephaven requires a few software packages. -### Launch: Python with example data and PyTorch +| Package | Version | OS | Required/Recommended | +| -------------- | ----------------------------- | ------------ | -------------------- | +| git | ^2.25.0 | All | Required | +| java | >=11, <20 | All | Required | +| docker | ^20.10.8 | All | Required | +| docker compose | ^2 | All | Recommended | +| Windows | 10 (OS build 20262 or higher) | Only Windows | Required | +| WSL | 2 | Only Windows | Required | -Run the following commands to launch Deephaven for Python server applications, with example data and [PyTorch](https://pytorch.org/). +You can check if these packages are installed and functioning by running: ```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/PyTorch/docker-compose.yml -O -docker-compose pull -docker-compose up -d +git version +java -version +docker version +docker compose version +docker run hello-world ``` -### Launch: Python with example data and SciKit-Learn +:::note -Run the following commands to launch Deephaven for Python server applications, with example data and [SciKit-Learn](https://scikit-learn.org/stable/). +Internally, the Java build process will use [Gradle Auto Provisioning](https://docs.gradle.org/current/userguide/toolchains.html#sec:provisioning) +to download and use the appropriate Java version for building and testing. -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/SciKit-Learn/docker-compose.yml -O -docker-compose pull -docker-compose up -d -``` +::: -### Launch: Python with example data and TensorFlow +:::note -Run the following commands to launch Deephaven for Python server applications, with example data and [TensorFlow](https://www.tensorflow.org/). +On Windows, all commands must be run inside a WSL 2 terminal. -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/python-examples/TensorFlow/docker-compose.yml -O -docker-compose pull -docker-compose up -d -``` +::: -### Launch: Groovy / Java +#### Python -Run the following commands to launch Deephaven for Groovy / Java server applications. +A Python virtual environment is highly recommended for building Deephaven from source. Additionally, the wheel is installed with [pip](https://pypi.org/project/pip/) and built with [Gradle](https://gradle.org/). -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/groovy/docker-compose.yml -O -docker-compose pull -docker-compose up -d +```sh +git clone https://github.com/deephaven/deephaven-core.git +cd deephaven-core +python3 -m venv /tmp/my-dh-venv +source /tmp/my-dh-venv/bin/activate +./gradlew py-server:assemble +pip install "py/server/build/wheel/deephaven_core--py3-non-any.whl[autocomplete] +./gradlew server-jetty-app:run ``` -### Launch: Groovy / Java with example data +#### Groovy -Run the following commands to launch Deephaven for Groovy / Java server applications, with example data. +The Groovy server is built with [Gradle](https://gradle.org/). `-Pgroovy` builds the Groovy server instead of Python. -```bash -curl https://raw.githubusercontent.com/deephaven/deephaven-core/main/containers/groovy-examples/docker-compose.yml -O -docker-compose pull -docker-compose up -d +```sh +git clone https://github.com/deephaven/deephaven-core.git +cd deephaven-core +./gradlew server-jetty-app:run -Pgroovy ``` -### Monitor logs - -The `-d` option to `docker-compose` causes the containers to run in the background, in detached mode. This option allows you to use your shell after Docker launches the containers. +## Get the authentication key -Since the container is running detached, you will not see any logs. However, you can follow the logs by running: +Deephaven, by default, uses [pre-shared key authentication](https://deephaven.io/core/docs/how-to-guides/authentication/auth-psk/) to authenticate against unauthorized access. -```bash -docker-compose logs -f -``` +### Deephaven run from Docker -Use CTRL+C to stop monitoring the logs and return to a prompt. +The pre-shared key is printed to the Docker logs when the server is started. Set your own key with the configuration parameter `-Dauthentication.psk=`. For users running Deephaven via Docker, this is set in the `environment` section of a `docker-compose.yml` file, or as a space-separated configuration parameter at the end of the [`docker run` command](#from-docker). -### Shutdown +To find the pre-shared key in the Docker logs: -The deployment can be brought down by running: - -```bash -docker-compose down +```sh +docker compose logs -f | grep "access through pre-shared key" ``` -### Manage example data +### Deephaven run from Python -[Deephaven's examples repository](https://github.com/deephaven/examples) contains data sets that are useful when learning -to use Deephaven. These data sets are used extensively in Deephaven's documentation and are needed to run some examples. [Deephaven's examples repository](https://github.com/deephaven/examples) contains documentation on the available data sets and how to manage them. +When a Deephaven server is started from Python, executing Deephaven queries from Python does _not_ require the key. However, if you wish to connect to the IDE via your web browser, you will need the pre-shared key. You will not be able to get the pre-shared key unless you set it yourself. To set the pre-shared key, add `"-Dauthentication.psk="` as an additional JVM parameter to the server. The following example sets the key to `MyPreSharedKey`: -If you have chosen a deployment with example data, the example data sets will be downloaded. Production deployments containing your own data will not need the example data sets. - - -To upgrade a deployment to the latest example data, run: - -```bash -docker-compose run examples download -``` - -To see what other example data management commands are available, run: - -```bash -docker-compose run examples +```python +from deephaven_server import Server +s = Server(port=10000, jvm_args=["-Xmx4g", "-Dauthentication.psk=MyPreSharedKey"]).start() ``` -If your deployment does not have example data, these commands will fail with `ERROR: No such service`. - +### Client APIs -## Run Deephaven IDE +Clients that attempt to connect to a server using pre-shared key authentication will need to supply the key to complete the connection. The key is the same for a client connection as it is for connecting directly to the server. For instance, in the [above example](#deephaven-run-from-python), the key for a client connection would also be `MyPreSharedKey`. -Once Deephaven is running, you can launch a Deephaven IDE in your web browser. Deephaven IDE allows you to interactively analyze data. +## Connect to the server -- If Deephaven is running locally, navigate to [http://localhost:10000/ide/](http://localhost:10000/ide/). -- If Deephaven is running remotely, navigate to `http://:10000/ide/`, where `` is the address of the machine Deephaven is running on. +The Deephaven UI is accessible from a web browser. For a server running locally on port 10000, it can be connected to via `https://localhost:10000/ide`. For a server running remotely on port 10000, it can be connected to via `https://:10000/ide`. If using authentication, enter credentials to gain access to the IDE. For information on supported browsers, see [here](https://github.com/deephaven/web-client-ui#browser-support). -![alt_text](docs/images/ide_startup.png "Deephaven IDE") - -# First query +## First query From the Deephaven IDE, you can perform your first query. -This script creates two small tables: one for employees and one for departments. -It joins the two tables on the DeptID column to show the name of the department -where each employee works. +The scripts below create two small tables: one for employees and one for departments. They are joined on the `DeptID` column to show the name of the department where each employee works. + +### Python ```python +from deephaven import new_table +from deephaven.column import string_col, int_col +from deephaven.constants import NULL_INT + +left = new_table([ + string_col("LastName", ["Rafferty", "Jones", "Steiner", "Robins", "Smith", "Rogers"]), + int_col("DeptID", [31, 33, 33, 34, 34, NULL_INT]), + string_col("Telephone", ["(347) 555-0123", "(917) 555-0198", "(212) 555-0167", "(952) 555-0110", None, None]) + ]) + +right = new_table([ + int_col("DeptID", [31, 33, 34, 35]), + string_col("DeptName", ["Sales", "Engineering", "Clerical", "Marketing"]), + string_col("Telephone", ["(646) 555-0134", "(646) 555-0178", "(646) 555-0159", "(212) 555-0111"]) + ]) + +t = left.join(right, "DeptID", "DeptName, DeptTelephone=Telephone") +``` + +![alt_text](docs/images/ide_first_query.png "Deephaven IDE First Query") -from deephaven.TableTools import newTable, stringCol, intCol -from deephaven.conversion_utils import NULL_INT +### Groovy +```groovy left = newTable( - stringCol("LastName", "Rafferty", "Jones", "Steiner", "Robins", "Smith", "Rogers"), - intCol("DeptID", 31, 33, 33, 34, 34, NULL_INT), - stringCol("Telephone", "(347) 555-0123", "(917) 555-0198", "(212) 555-0167", "(952) 555-0110", None, None) + string_col("LastName", "Rafferty", "Jones", "Steiner", "Robins", "Smith", "Rogers"), + int_col("DeptID", 31, 33, 33, 34, 34, NULL_INT), + string_col("Telephone", "(347) 555-0123", "(917) 555-0198", "(212) 555-0167", "(952) 555-0110", null, null) ) right = newTable( @@ -276,12 +263,11 @@ right = newTable( stringCol("Telephone", "(646) 555-0134", "(646) 555-0178", "(646) 555-0159", "(212) 555-0111") ) -t = left.join(right, "DeptID", "DeptName,DeptTelephone=Telephone") +t = left.join(right, "DeptID", "DeptName, DeptTelephone=Telephone") ``` ![alt_text](docs/images/ide_first_query.png "Deephaven IDE First Query") - ## Resources * [Help!](https://github.com/deephaven/deephaven-core/discussions/969) @@ -291,14 +277,17 @@ t = left.join(right, "DeptID", "DeptName,DeptTelephone=Telephone") * [Java API docs](https://deephaven.io/core/javadoc/) * [Python API docs](https://deephaven.io/core/pydoc/) -## Code Of Conduct +## Contributing + +See [CONTRIBUTING](./CONTRIBUTING.md) for full instructions on how to contribute to this project. + +### Code Of Conduct This project has adopted the [Contributor Covenant Code of Conduct](https://www.contributor-covenant.org/version/2/0/code_of_conduct/). For more information see the [Code of Conduct](CODE_OF_CONDUCT.md) or contact [opencode@deephaven.io](mailto:opencode@deephaven.io) with any additional questions or comments. - -## License +### License Copyright © 2016-2023 Deephaven Data Labs and Patent Pending. All rights reserved. From b7fdbcc367724279881cc01ea12321579959948a Mon Sep 17 00:00:00 2001 From: Stan Brubaker <120737309+stanbrub@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:24:41 -0700 Subject: [PATCH 27/41] RELEASE.MD Added PR verification step after cherry pick (#4846) --- RELEASE.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 6c70ba50161..3bc643c6d78 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -111,6 +111,9 @@ $ git cherry-pick <...> # See https://github.com/deephaven/deephaven-core/issues/3466 for future improvements to this process. $ ... $ git commit -m "Bump to X.Y.1" +$ git --no-pager log --oneline vX.Y.0..release/vX.Y.1 +# +# Compare output to expected PR list for missing or extraneous PRs ``` ### 3. Push to upstream From a52c2fea4fb57ace9147e7457351b445344038dc Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Thu, 16 Nov 2023 10:50:04 -0600 Subject: [PATCH 28/41] JS API must support SKIP as an aggregation type (#4780) Fixes #4182 --- .../web/client/api/JsTotalsTableConfig.java | 8 ++++++-- .../web/client/api/tree/JsRollupConfig.java | 4 ++++ .../api/tree/enums/JsAggregationOperation.java | 12 +++++++----- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTotalsTableConfig.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTotalsTableConfig.java index 27269548a2b..9b3e53c12ed 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTotalsTableConfig.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTotalsTableConfig.java @@ -83,7 +83,8 @@ public class JsTotalsTableConfig { JsAggregationOperation.LAST, JsAggregationOperation.COUNT_DISTINCT, JsAggregationOperation.DISTINCT, - JsAggregationOperation.UNIQUE); + JsAggregationOperation.UNIQUE, + JsAggregationOperation.SKIP); /** * Specifies if a Totals Table should be expanded by default in the UI. Defaults to false. @@ -111,7 +112,6 @@ public class JsTotalsTableConfig { */ public JsArray groupBy = new JsArray<>(); - private AggregateRequest grpcRequest; private JsArray customColumns; private JsArray dropColumns; @@ -406,6 +406,10 @@ public AggregateRequest buildRequest(JsArray allColumns) { // case JsAggregationOperation.WSUM: { // // TODO #3302 support this // } + case JsAggregationOperation.SKIP: { + // cancel entirely, start the loop again + return; + } default: JsLog.warn("Aggregation " + aggregationType + " not supported, ignoring"); } diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsRollupConfig.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsRollupConfig.java index 3108c86adc8..7cf14222b66 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsRollupConfig.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsRollupConfig.java @@ -261,6 +261,10 @@ public RollupRequest buildRequest(JsArray tableColumns) { // case JsAggregationOperation.WSUM: { // // TODO #3302 support this // } + case JsAggregationOperation.SKIP: { + // cancel entirely, start the loop again + return; + } default: JsLog.warn("Aggregation " + aggregationType + " not supported, ignoring"); } diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java index ab0824b1a98..acd460e0859 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/enums/JsAggregationOperation.java @@ -74,9 +74,7 @@ public class JsAggregationOperation { */ LAST = "Last", UNIQUE = "Unique"; - /** - * Indicates that this column should not be aggregated. String value is "Skip". - */ + // Array operation isn't legal in all contexts, just omit it for now // ARRAY = "Array", // These need some other parameter to function, not supported yet @@ -84,7 +82,10 @@ public class JsAggregationOperation { // SORTED_FIRST="SortedFirst", // SORTED_LAST="SortedLast", // WSUM = "WeightedSum"; - @Deprecated + + /** + * Indicates that this column should not be aggregated. String value is "Skip". + */ public static final String SKIP = "Skip"; @JsIgnore @@ -95,7 +96,8 @@ public static boolean canAggregateType(String aggregationType, String columnType case DISTINCT: case FIRST: case LAST: - case UNIQUE: { + case UNIQUE: + case SKIP: { // These operations are always safe return true; } From 11537f003d4c4d8c2ae244302b56667def42b0ec Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Thu, 16 Nov 2023 11:05:52 -0600 Subject: [PATCH 29/41] JS API member/documentation cleanup (#4787) --- .../io/deephaven/web/client/api/JsTable.java | 7 ++- .../web/client/api/filter/FilterValue.java | 13 +++-- .../web/client/api/tree/JsTreeTable.java | 51 ++++++++++--------- .../web/client/ide/IdeConnection.java | 1 + 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java index c31ac023a4f..f7f78fad0ea 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/JsTable.java @@ -386,9 +386,8 @@ public String[] getAttributes() { } /** - * null if no property exists, a string if it is an easily serializable property, or a Promise - *

- * that will either resolve with a table or error out if the object can't be passed to JS. + * null if no property exists, a string if it is an easily serializable property, or a {@code Promise + * <Table>} that will either resolve with a table or error out if the object can't be passed to JS. * * @param attributeName * @return Object @@ -877,7 +876,7 @@ public Promise getTotalsTable( * * @return dh.TotalsTableConfig */ - @JsMethod + @JsProperty public JsTotalsTableConfig getTotalsTableConfig() { // we want to communicate to the JS dev that there is no default config, so we allow // returning null here, rather than a default config. They can then easily build a diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/filter/FilterValue.java b/web/client-api/src/main/java/io/deephaven/web/client/api/filter/FilterValue.java index 7af38fb4646..a224489cd6b 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/filter/FilterValue.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/filter/FilterValue.java @@ -20,6 +20,8 @@ import io.deephaven.web.client.api.Column; import io.deephaven.web.client.api.DateWrapper; import io.deephaven.web.client.api.LongWrapper; +import io.deephaven.web.client.api.TableData; +import io.deephaven.web.client.api.i18n.JsTimeZone; import javaemul.internal.annotations.DoNotAutobox; import jsinterop.annotations.JsIgnore; import jsinterop.annotations.JsMethod; @@ -101,12 +103,13 @@ public static FilterValue ofNumber(double input) { /** * Constructs a number for the filter API from the given parameter. Can also be used on the values returned from - * for DateTime values. To create a filter with a date, use dh.DateWrapper.ofJsDate or - * dh.i18n.DateTimeFormat.parse. To create a filter with a 64-bit long integer, use - * dh.LongWrapper.ofString. + * {@link io.deephaven.web.client.api.TableData.Row#get(TableData.RowPositionUnion)} for DateTime values. To create + * a filter with a date, use dh.DateWrapper.ofJsDate or + * {@link io.deephaven.web.client.api.i18n.JsDateTimeFormat#parse(String, JsTimeZone)}. To create a filter with a + * 64-bit long integer, use {@link LongWrapper#ofString(String)}. * - * @param input - * @return + * @param input the number to wrap as a FilterValue + * @return an immutable FilterValue that can be built into a filter */ public static FilterValue ofNumber(OfNumberUnionParam input) { Objects.requireNonNull(input); diff --git a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java index 07153a5f0db..a1f0569955c 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/api/tree/JsTreeTable.java @@ -71,39 +71,44 @@ import static io.deephaven.web.client.api.subscription.ViewportData.NO_ROW_FORMAT_COLUMN; /** - * Behaves like a JsTable externally, but data, state, and viewports are managed by an entirely different mechanism, and - * so reimplemented here. - * + * Behaves like a {@link JsTable} externally, but data, state, and viewports are managed by an entirely different + * mechanism, and so reimplemented here. + *

* Any time a change is made, we build a new request and send it to the server, and wait for the updated state. - * + *

* Semantics around getting updates from the server are slightly different - we don't "unset" the viewport here after * operations are performed, but encourage the client code to re-set them to the desired position. - * + *

* The table size will be -1 until a viewport has been fetched. - * + *

* Similar to a table, a Tree Table provides access to subscribed viewport data on the current hierarchy. A different * Row type is used within that viewport, showing the depth of that node within the tree and indicating details about - * whether or not it has children or is expanded. The Tree Table itself then provides the ability to change if a row is + * whether it has children or is expanded. The Tree Table itself then provides the ability to change if a row is * expanded or not. Methods used to control or check if a row should be expanded or not can be invoked on a TreeRow * instance, or on the number of the row (thus allowing for expanding/collapsing rows which are not currently visible in * the viewport). - * - * Events and viewports are somewhat different than tables, due to the expense of computing the expanded/collapsed rows + *

+ * Events and viewports are somewhat different from tables, due to the expense of computing the expanded/collapsed rows * and count of children at each level of the hierarchy, and differences in the data that is available. - * - * - There is no totalSize property. - The viewport is not un-set when changes are made to filter or sort, but - * changes will continue to be streamed in. It is suggested that the viewport be changed to the desired position - * (usually the first N rows) after any filter/sort change is made. Likewise, getViewportData() will always - * return the most recent data, and will not wait if a new operation is pending. - Custom columns are not directly - * supported. If the TreeTable was created client-side, the original Table can have custom columns applied, and - * the TreeTable can be recreated. - The totalsTableConfig property is instead a method, and returns a - * promise so the config can be fetched asynchronously. - Totals Tables for trees vary in behavior between hierarchical - * tables and roll-up tables. This behavior is based on the original flat table used to produce the Tree Table - for a - * hierarchical table (i.e. Table.treeTable in the query config), the totals will include non-leaf nodes (since they are - * themselves actual rows in the table), but in a roll-up table, the totals only include leaf nodes (as non-leaf nodes - * are generated through grouping the contents of the original table). Roll-ups also have the - * isIncludeConstituents property, indicating that a Column in the tree may have a constituentType - * property reflecting that the type of cells where hasChildren is false will be different from usual. + *

+ *

    + *
  • There is no {@link JsTable#getTotalSize() totalSize} property.
  • + *
  • The viewport is not un-set when changes are made to filter or sort, but changes will continue to be streamed in. + * It is suggested that the viewport be changed to the desired position (usually the first N rows) after any filter/sort + * change is made. Likewise, {@link #getViewportData()} will always return the most recent data, and will not wait if a + * new operation is pending.
  • + *
  • Custom columns are not directly supported. If the TreeTable was created client-side, the original Table can have + * custom columns applied, and the TreeTable can be recreated.
  • + *
  • Whereas Table has a {@link JsTable#getTotalsTableConfig()} property, it is defined here as a method, + * {@link #getTotalsTableConfig()}. This returns a promise so the config can be fetched asynchronously.
  • + *
  • Totals Tables for trees vary in behavior between tree tables and roll-up tables. This behavior is based on the + * original flat table used to produce the Tree Table - for a hierarchical table (i.e. Table.treeTable in the query + * config), the totals will include non-leaf nodes (since they are themselves actual rows in the table), but in a + * roll-up table, the totals only include leaf nodes (as non-leaf nodes are generated through grouping the contents of + * the original table). Roll-ups also have the {@link JsRollupConfig#includeConstituents} property, indicating that a + * {@link Column} in the tree may have a {@link Column#getConstituentType()} property reflecting that the type of cells + * where {@link TreeRow#hasChildren()} is false will be different from usual.
  • + *
*/ @JsType(namespace = "dh", name = "TreeTable") public class JsTreeTable extends HasLifecycle implements ServerObject { diff --git a/web/client-api/src/main/java/io/deephaven/web/client/ide/IdeConnection.java b/web/client-api/src/main/java/io/deephaven/web/client/ide/IdeConnection.java index 657fca26b57..a32b290c613 100644 --- a/web/client-api/src/main/java/io/deephaven/web/client/ide/IdeConnection.java +++ b/web/client-api/src/main/java/io/deephaven/web/client/ide/IdeConnection.java @@ -148,6 +148,7 @@ public JsRunnable subscribeToFieldUpdates(JsConsumer callback }; } + @JsIgnore @Override public void notifyServerShutdown(TerminationNotificationResponse success) { final String details; From 8123f7caadc9f50cb5f8eedc063c2d8d4bd0ba5c Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Thu, 16 Nov 2023 14:03:21 -0800 Subject: [PATCH 30/41] Add parquet read TableDefinition support (#4831) Additionally, adds explicit entry points for single, flat-partitioned, and kv-partitioned reads. Fixes #4746 Partial workaround for #871 --- .../engine/table/impl/SourceTable.java | 5 + .../impl/locations/TableLocationProvider.java | 2 +- .../impl/KnownLocationKeyFinder.java | 29 +- .../deephaven/parquet/table/ParquetTools.java | 299 ++++++-- .../table/ParquetTableReadWriteTest.java | 668 ++++++++++++++---- py/server/deephaven/parquet.py | 91 ++- py/server/tests/test_parquet.py | 166 ++++- 7 files changed, 1033 insertions(+), 227 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/SourceTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/SourceTable.java index bc205fd2ef0..f2870f10631 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/SourceTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/SourceTable.java @@ -113,6 +113,11 @@ private void initialize() { initializeLocationSizes(); } + @TestUseOnly + public final TableLocationProvider tableLocationProvider() { + return locationProvider; + } + /** * This is only for unit tests, at this time. */ diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/TableLocationProvider.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/TableLocationProvider.java index ea8bc55dee3..d8991c7d964 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/TableLocationProvider.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/TableLocationProvider.java @@ -77,7 +77,7 @@ interface Listener extends BasicTableDataListener { void unsubscribe(@NotNull Listener listener); /** - * Initialize or run state information about the list of existing locations. + * Initialize or refresh state information about the list of existing locations. */ void refresh(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/impl/KnownLocationKeyFinder.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/impl/KnownLocationKeyFinder.java index 17df1e71c3c..5710f235e33 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/impl/KnownLocationKeyFinder.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/locations/impl/KnownLocationKeyFinder.java @@ -9,6 +9,7 @@ import java.util.Arrays; import java.util.Comparator; import java.util.List; +import java.util.Optional; import java.util.function.Consumer; /** @@ -34,10 +35,18 @@ public static KnownLocationKeyFinder(mutableKeys); + final String comparatorString = comparator == null + ? null + : Comparator.naturalOrder().equals(comparator) + ? "Comparator.naturalOrder()" + : comparator.toString(); + final String toString = + String.format("%s[%s, %s]", KnownLocationKeyFinder.class.getSimpleName(), finder, comparatorString); + return new KnownLocationKeyFinder<>(mutableKeys, toString); } private final List knownKeys; + private final String toString; @SafeVarargs public KnownLocationKeyFinder(@NotNull final TLK... knownKeys) { @@ -45,7 +54,12 @@ public KnownLocationKeyFinder(@NotNull final TLK... knownKeys) { } public KnownLocationKeyFinder(List knownKeys) { + this(knownKeys, null); + } + + public KnownLocationKeyFinder(List knownKeys, String toString) { this.knownKeys = List.copyOf(knownKeys); + this.toString = toString; } /** @@ -55,8 +69,21 @@ public List getKnownKeys() { return knownKeys; } + public Optional getFirstKey() { + return knownKeys.isEmpty() ? Optional.empty() : Optional.of(knownKeys.get(0)); + } + + public Optional getLastKey() { + return knownKeys.isEmpty() ? Optional.empty() : Optional.of(knownKeys.get(knownKeys.size() - 1)); + } + @Override public void findKeys(@NotNull Consumer locationKeyObserver) { knownKeys.forEach(locationKeyObserver); } + + @Override + public String toString() { + return toString == null ? super.toString() : toString; + } } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java index a7cdbc89e83..c477d65bf69 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetTools.java @@ -13,7 +13,7 @@ import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.locations.util.TableDataRefreshService; -import io.deephaven.engine.util.TableTools; +import io.deephaven.engine.updategraph.UpdateSourceRegistrar; import io.deephaven.vector.*; import io.deephaven.stringset.StringSet; import io.deephaven.engine.util.file.TrackedFileHandleFactory; @@ -61,6 +61,8 @@ @SuppressWarnings("WeakerAccess") public class ParquetTools { + private static final int MAX_PARTITIONING_LEVELS_INFERENCE = 32; + private ParquetTools() {} private static final Logger log = LoggerFactory.getLogger(ParquetTools.class); @@ -68,6 +70,18 @@ private ParquetTools() {} /** * Reads in a table from a single parquet, metadata file, or directory with recognized layout. * + *

+ * This method attempts to "do the right thing." It examines the source to determine if it's a single parquet file, + * a metadata file, or a directory. If it's a directory, it additionally tries to guess the layout to use. Unless a + * metadata file is supplied or discovered in the directory, the highest (by {@link ParquetTableLocationKey location + * key} order) location found will be used to infer schema. + * + *

+ * Delegates to one of {@link #readSingleFileTable(File, ParquetInstructions)}, + * {@link #readPartitionedTableWithMetadata(File, ParquetInstructions)}, + * {@link #readFlatPartitionedTable(File, ParquetInstructions)}, or + * {@link #readKeyValuePartitionedTable(File, ParquetInstructions)}. + * * @param sourceFilePath The file or directory to examine * @return table * @see ParquetSingleFileLayout @@ -82,6 +96,18 @@ public static Table readTable(@NotNull final String sourceFilePath) { /** * Reads in a table from a single parquet, metadata file, or directory with recognized layout. * + *

+ * This method attempts to "do the right thing." It examines the source to determine if it's a single parquet file, + * a metadata file, or a directory. If it's a directory, it additionally tries to guess the layout to use. Unless a + * metadata file is supplied or discovered in the directory, the highest (by {@link ParquetTableLocationKey location + * key} order) location found will be used to infer schema. + * + *

+ * Delegates to one of {@link #readSingleFileTable(File, ParquetInstructions)}, + * {@link #readPartitionedTableWithMetadata(File, ParquetInstructions)}, + * {@link #readFlatPartitionedTable(File, ParquetInstructions)}, or + * {@link #readKeyValuePartitionedTable(File, ParquetInstructions)}. + * * @param sourceFilePath The file or directory to examine * @param readInstructions Instructions for customizations while reading * @return table @@ -99,6 +125,18 @@ public static Table readTable( /** * Reads in a table from a single parquet, metadata file, or directory with recognized layout. * + *

+ * This method attempts to "do the right thing." It examines the source to determine if it's a single parquet file, + * a metadata file, or a directory. If it's a directory, it additionally tries to guess the layout to use. Unless a + * metadata file is supplied or discovered in the directory, the highest (by {@link ParquetTableLocationKey location + * key} order) location found will be used to infer schema. + * + *

+ * Delegates to one of {@link #readSingleFileTable(File, ParquetInstructions)}, + * {@link #readPartitionedTableWithMetadata(File, ParquetInstructions)}, + * {@link #readFlatPartitionedTable(File, ParquetInstructions)}, or + * {@link #readKeyValuePartitionedTable(File, ParquetInstructions)}. + * * @param sourceFile The file or directory to examine * @return table * @see ParquetSingleFileLayout @@ -113,6 +151,18 @@ public static Table readTable(@NotNull final File sourceFile) { /** * Reads in a table from a single parquet, metadata file, or directory with recognized layout. * + *

+ * This method attempts to "do the right thing." It examines the source to determine if it's a single parquet file, + * a metadata file, or a directory. If it's a directory, it additionally tries to guess the layout to use. Unless a + * metadata file is supplied or discovered in the directory, the highest (by {@link ParquetTableLocationKey location + * key} order) location found will be used to infer schema. + * + *

+ * Delegates to one of {@link #readSingleFileTable(File, ParquetInstructions)}, + * {@link #readPartitionedTableWithMetadata(File, ParquetInstructions)}, + * {@link #readFlatPartitionedTable(File, ParquetInstructions)}, or + * {@link #readKeyValuePartitionedTable(File, ParquetInstructions)}. + * * @param sourceFile The file or directory to examine * @param readInstructions Instructions for customizations while reading * @return table @@ -551,6 +601,12 @@ public static void deleteTable(File path) { * metadata file is supplied or discovered in the directory, the highest (by {@link ParquetTableLocationKey location * key} order) location found will be used to infer schema. * + *

+ * Delegates to one of {@link #readSingleFileTable(File, ParquetInstructions)}, + * {@link #readPartitionedTableWithMetadata(File, ParquetInstructions)}, + * {@link #readFlatPartitionedTable(File, ParquetInstructions)}, or + * {@link #readKeyValuePartitionedTable(File, ParquetInstructions)}. + * * @param source The source file or directory * @param instructions Instructions for reading * @return A {@link Table} @@ -566,16 +622,7 @@ private static Table readTableInternal( final BasicFileAttributes sourceAttr = readAttributes(sourcePath); if (sourceAttr.isRegularFile()) { if (sourceFileName.endsWith(PARQUET_FILE_EXTENSION)) { - if (instructions.isRefreshing()) { - throw new IllegalArgumentException("Unable to have a refreshing single parquet file"); - } - final ParquetTableLocationKey tableLocationKey = new ParquetTableLocationKey(source, 0, null); - final Pair>, ParquetInstructions> schemaInfo = convertSchema( - tableLocationKey.getFileReader().getSchema(), - tableLocationKey.getMetadata().getFileMetaData().getKeyValueMetaData(), - instructions); - return readSingleFileTable(tableLocationKey, schemaInfo.getSecond(), - TableDefinition.of(schemaInfo.getFirst())); + return readSingleFileTable(source, instructions); } if (sourceFileName.equals(ParquetMetadataFileLayout.METADATA_FILE_NAME)) { return readPartitionedTableWithMetadata(source.getParentFile(), instructions); @@ -594,10 +641,7 @@ private static Table readTableInternal( final Path firstEntryPath; // Ignore dot files while looking for the first entry try (final DirectoryStream sourceStream = - Files.newDirectoryStream(sourcePath, (path) -> { - final String filename = path.getFileName().toString(); - return !filename.isEmpty() && filename.charAt(0) != '.'; - })) { + Files.newDirectoryStream(sourcePath, ParquetTools::ignoreDotFiles)) { final Iterator entryIterator = sourceStream.iterator(); if (!entryIterator.hasNext()) { throw new TableDataException("Source directory " + source + " is empty"); @@ -609,16 +653,21 @@ private static Table readTableInternal( final String firstEntryFileName = firstEntryPath.getFileName().toString(); final BasicFileAttributes firstEntryAttr = readAttributes(firstEntryPath); if (firstEntryAttr.isDirectory() && firstEntryFileName.contains("=")) { - return readPartitionedTableInferSchema(new ParquetKeyValuePartitionedLayout(source, 32), instructions); + return readKeyValuePartitionedTable(source, instructions); } if (firstEntryAttr.isRegularFile() && firstEntryFileName.endsWith(PARQUET_FILE_EXTENSION)) { - return readPartitionedTableInferSchema(new ParquetFlatPartitionedLayout(source), instructions); + return readFlatPartitionedTable(source, instructions); } throw new TableDataException("No recognized Parquet table layout found in " + source); } throw new TableDataException("Source " + source + " is neither a directory nor a regular file"); } + private static boolean ignoreDotFiles(Path path) { + final String filename = path.getFileName().toString(); + return !filename.isEmpty() && filename.charAt(0) != '.'; + } + private static BasicFileAttributes readAttributes(@NotNull final Path path) { try { return Files.readAttributes(path, BasicFileAttributes.class); @@ -630,6 +679,10 @@ private static BasicFileAttributes readAttributes(@NotNull final Path path) { /** * Reads in a table from a single parquet file using the provided table definition. * + *

+ * Callers may prefer the simpler methods {@link #readSingleFileTable(File, ParquetInstructions)} or + * {@link #readSingleFileTable(File, ParquetInstructions, TableDefinition)}. + * * @param tableLocationKey The {@link ParquetTableLocationKey location keys} to include * @param readInstructions Instructions for customizations while reading * @param tableDefinition The table's {@link TableDefinition definition} @@ -639,6 +692,9 @@ public static Table readSingleFileTable( @NotNull final ParquetTableLocationKey tableLocationKey, @NotNull final ParquetInstructions readInstructions, @NotNull final TableDefinition tableDefinition) { + if (readInstructions.isRefreshing()) { + throw new IllegalArgumentException("Unable to have a refreshing single parquet file"); + } final TableLocationProvider locationProvider = new PollingTableLocationProvider<>( StandaloneTableKey.getInstance(), new KnownLocationKeyFinder<>(tableLocationKey), @@ -649,6 +705,27 @@ public static Table readSingleFileTable( RegionedTableComponentFactoryImpl.INSTANCE, locationProvider, null); } + /** + * Reads in a table from files discovered with {@code locationKeyFinder} using a definition built from the highest + * (by {@link ParquetTableLocationKey location key} order) location found, which must have non-null partition values + * for all partition keys. + * + * @param locationKeyFinder The source of {@link ParquetTableLocationKey location keys} to include + * @param readInstructions Instructions for customizations while reading + * @return The table + */ + public static Table readPartitionedTable( + @NotNull final TableLocationKeyFinder locationKeyFinder, + @NotNull final ParquetInstructions readInstructions) { + final KnownLocationKeyFinder inferenceKeys = toKnownKeys(locationKeyFinder); + final Pair inference = infer(inferenceKeys, readInstructions); + return readPartitionedTable( + // In the case of a static output table, we can re-use the already fetched inference keys + readInstructions.isRefreshing() ? locationKeyFinder : inferenceKeys, + inference.getSecond(), + inference.getFirst()); + } + /** * Reads in a table from files discovered with {@code locationKeyFinder} using the provided table definition. * @@ -661,19 +738,31 @@ public static Table readPartitionedTable( @NotNull final TableLocationKeyFinder locationKeyFinder, @NotNull final ParquetInstructions readInstructions, @NotNull final TableDefinition tableDefinition) { - final TableLocationProvider locationProvider = new PollingTableLocationProvider<>( - StandaloneTableKey.getInstance(), - locationKeyFinder, - new ParquetTableLocationFactory(readInstructions), - readInstructions.isRefreshing() ? TableDataRefreshService.getSharedRefreshService() : null); + final String description; + final TableLocationKeyFinder keyFinder; + final TableDataRefreshService refreshService; + final UpdateSourceRegistrar updateSourceRegistrar; + if (readInstructions.isRefreshing()) { + keyFinder = locationKeyFinder; + description = "Read refreshing parquet files with " + keyFinder; + refreshService = TableDataRefreshService.getSharedRefreshService(); + updateSourceRegistrar = ExecutionContext.getContext().getUpdateGraph(); + } else { + keyFinder = toKnownKeys(locationKeyFinder); + description = "Read multiple parquet files with " + keyFinder; + refreshService = null; + updateSourceRegistrar = null; + } return new PartitionAwareSourceTable( tableDefinition, - readInstructions.isRefreshing() - ? "Read refreshing parquet files with " + locationKeyFinder - : "Read multiple parquet files with " + locationKeyFinder, + description, RegionedTableComponentFactoryImpl.INSTANCE, - locationProvider, - readInstructions.isRefreshing() ? ExecutionContext.getContext().getUpdateGraph() : null); + new PollingTableLocationProvider<>( + StandaloneTableKey.getInstance(), + keyFinder, + new ParquetTableLocationFactory(readInstructions), + refreshService), + updateSourceRegistrar); } /** @@ -684,22 +773,23 @@ public static Table readPartitionedTable( * @param locationKeyFinder The source of {@link ParquetTableLocationKey location keys} to include * @param readInstructions Instructions for customizations while reading * @return The table + * @deprecated use {@link #readPartitionedTable(TableLocationKeyFinder, ParquetInstructions)} */ + @Deprecated public static Table readPartitionedTableInferSchema( @NotNull final TableLocationKeyFinder locationKeyFinder, @NotNull final ParquetInstructions readInstructions) { - final KnownLocationKeyFinder sortedKeys = - KnownLocationKeyFinder.copyFrom(locationKeyFinder, Comparator.naturalOrder()); - if (sortedKeys.getKnownKeys().isEmpty()) { - if (readInstructions.isRefreshing()) { - throw new IllegalArgumentException( - "Unable to infer schema for a refreshing partitioned parquet table when there are no initial parquet files"); - } - return TableTools.emptyTable(0); + return readPartitionedTable(locationKeyFinder, readInstructions); + } + + private static Pair infer( + KnownLocationKeyFinder inferenceKeys, ParquetInstructions readInstructions) { + // TODO(deephaven-core#877): Support schema merge when discovering multiple parquet files + final ParquetTableLocationKey lastKey = inferenceKeys.getLastKey().orElse(null); + if (lastKey == null) { + throw new IllegalArgumentException( + "Unable to infer schema for a partitioned parquet table when there are no initial parquet files"); } - // TODO (https://github.com/deephaven/deephaven-core/issues/877): Support schema merge when discovering multiple - // parquet files - final ParquetTableLocationKey lastKey = sortedKeys.getKnownKeys().get(sortedKeys.getKnownKeys().size() - 1); final Pair>, ParquetInstructions> schemaInfo = convertSchema( lastKey.getFileReader().getSchema(), lastKey.getMetadata().getFileMetaData().getKeyValueMetaData(), @@ -712,19 +802,23 @@ public static Table readPartitionedTableInferSchema( throw new IllegalArgumentException(String.format( "Last location key %s has null partition value at partition key %s", lastKey, partitionKey)); } - // Primitives should be unboxed, except booleans Class dataType = partitionValue.getClass(); if (dataType != Boolean.class) { dataType = getUnboxedTypeIfBoxed(partitionValue.getClass()); } - allColumns.add(ColumnDefinition.fromGenericType(partitionKey, dataType, null, ColumnDefinition.ColumnType.Partitioning)); } allColumns.addAll(schemaInfo.getFirst()); - return readPartitionedTable(readInstructions.isRefreshing() ? locationKeyFinder : sortedKeys, - schemaInfo.getSecond(), TableDefinition.of(allColumns)); + return new Pair<>(TableDefinition.of(allColumns), schemaInfo.getSecond()); + } + + private static KnownLocationKeyFinder toKnownKeys( + TableLocationKeyFinder keyFinder) { + return keyFinder instanceof KnownLocationKeyFinder + ? (KnownLocationKeyFinder) keyFinder + : KnownLocationKeyFinder.copyFrom(keyFinder, Comparator.naturalOrder()); } /** @@ -741,6 +835,127 @@ public static Table readPartitionedTableWithMetadata( return readPartitionedTable(layout, layout.getInstructions(), layout.getTableDefinition()); } + /** + * Creates a partitioned table via the key-value partitioned parquet files from the root {@code directory}, + * inferring the table definition from those files. + * + *

+ * Callers wishing to be more explicit and skip the inference step may prefer to call + * {@link #readKeyValuePartitionedTable(File, ParquetInstructions, TableDefinition)}. + * + * @param directory the source of {@link ParquetTableLocationKey location keys} to include + * @param readInstructions the instructions for customizations while reading + * @return the table + * @see ParquetKeyValuePartitionedLayout#ParquetKeyValuePartitionedLayout(File, int) + * @see #readPartitionedTable(TableLocationKeyFinder, ParquetInstructions) + */ + public static Table readKeyValuePartitionedTable( + @NotNull final File directory, + @NotNull final ParquetInstructions readInstructions) { + return readPartitionedTable(new ParquetKeyValuePartitionedLayout(directory, MAX_PARTITIONING_LEVELS_INFERENCE), + readInstructions); + } + + /** + * Creates a partitioned table via the key-value partitioned parquet files from the root {@code directory} using the + * provided {@code tableDefinition}. + * + * @param directory the source of {@link ParquetTableLocationKey location keys} to include + * @param readInstructions the instructions for customizations while reading + * @param tableDefinition the table definition + * @return the table + * @see ParquetKeyValuePartitionedLayout#ParquetKeyValuePartitionedLayout(File, TableDefinition) + * @see #readPartitionedTable(TableLocationKeyFinder, ParquetInstructions, TableDefinition) + */ + public static Table readKeyValuePartitionedTable( + @NotNull final File directory, + @NotNull final ParquetInstructions readInstructions, + @NotNull final TableDefinition tableDefinition) { + if (tableDefinition.getColumnStream().noneMatch(ColumnDefinition::isPartitioning)) { + throw new IllegalArgumentException("No partitioning columns"); + } + return readPartitionedTable(new ParquetKeyValuePartitionedLayout(directory, tableDefinition), readInstructions, + tableDefinition); + } + + /** + * Creates a partitioned table via the flat parquet files from the root {@code directory}, inferring the table + * definition from those files. + * + *

+ * Callers wishing to be more explicit and skip the inference step may prefer to call + * {@link #readFlatPartitionedTable(File, ParquetInstructions, TableDefinition)}. + * + * @param directory the source of {@link ParquetTableLocationKey location keys} to include + * @param readInstructions the instructions for customizations while reading + * @return the table + * @see #readPartitionedTable(TableLocationKeyFinder, ParquetInstructions) + * @see ParquetFlatPartitionedLayout#ParquetFlatPartitionedLayout(File) + */ + public static Table readFlatPartitionedTable( + @NotNull final File directory, + @NotNull final ParquetInstructions readInstructions) { + return readPartitionedTable(new ParquetFlatPartitionedLayout(directory), readInstructions); + } + + /** + * Creates a partitioned table via the flat parquet files from the root {@code directory} using the provided + * {@code tableDefinition}. + * + * @param directory the source of {@link ParquetTableLocationKey location keys} to include + * @param readInstructions the instructions for customizations while reading + * @param tableDefinition the table definition + * @return the table + * @see #readPartitionedTable(TableLocationKeyFinder, ParquetInstructions, TableDefinition) + * @see ParquetFlatPartitionedLayout#ParquetFlatPartitionedLayout(File) + */ + public static Table readFlatPartitionedTable( + @NotNull final File directory, + @NotNull final ParquetInstructions readInstructions, + @NotNull final TableDefinition tableDefinition) { + return readPartitionedTable(new ParquetFlatPartitionedLayout(directory), readInstructions, tableDefinition); + } + + /** + * Creates a single table via the parquet {@code file} using the table definition derived from that {@code file}. + * + *

+ * Callers wishing to be more explicit (for example, to skip some columns) may prefer to call + * {@link #readSingleFileTable(File, ParquetInstructions, TableDefinition)}. + * + * @param file the parquet file + * @param readInstructions the instructions for customizations while reading + * @return the table + * @see ParquetTableLocationKey#ParquetTableLocationKey(File, int, Map) + * @see #readSingleFileTable(ParquetTableLocationKey, ParquetInstructions, TableDefinition) + */ + public static Table readSingleFileTable( + @NotNull final File file, + @NotNull final ParquetInstructions readInstructions) { + final ParquetSingleFileLayout keyFinder = new ParquetSingleFileLayout(file); + final KnownLocationKeyFinder inferenceKeys = toKnownKeys(keyFinder); + final Pair inference = infer(inferenceKeys, readInstructions); + return readSingleFileTable(inferenceKeys.getFirstKey().orElseThrow(), inference.getSecond(), + inference.getFirst()); + } + + /** + * Creates a single table via the parquet {@code file} using the provided {@code tableDefinition}. + * + * @param file the parquet file + * @param readInstructions the instructions for customizations while reading + * @param tableDefinition the table definition + * @return the table + * @see ParquetTableLocationKey#ParquetTableLocationKey(File, int, Map) + * @see #readSingleFileTable(ParquetTableLocationKey, ParquetInstructions, TableDefinition) + */ + public static Table readSingleFileTable( + @NotNull final File file, + @NotNull final ParquetInstructions readInstructions, + @NotNull final TableDefinition tableDefinition) { + return readSingleFileTable(new ParquetTableLocationKey(file, 0, null), readInstructions, tableDefinition); + } + private static final SimpleTypeMap> VECTOR_TYPE_MAP = SimpleTypeMap.create( null, CharVector.class, ByteVector.class, ShortVector.class, IntVector.class, LongVector.class, FloatVector.class, DoubleVector.class, ObjectVector.class); diff --git a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java index 5924b9bbfa6..4727ba36205 100644 --- a/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java +++ b/extensions/parquet/table/src/test/java/io/deephaven/parquet/table/ParquetTableReadWriteTest.java @@ -6,7 +6,6 @@ import io.deephaven.UncheckedDeephavenException; import io.deephaven.api.Selectable; import io.deephaven.base.FileUtils; -import io.deephaven.datastructures.util.CollectionUtil; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.primitive.function.ByteConsumer; import io.deephaven.engine.primitive.function.CharConsumer; @@ -15,13 +14,15 @@ import io.deephaven.engine.primitive.iterator.CloseableIterator; import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.engine.table.ColumnSource; +import io.deephaven.engine.table.impl.SourceTable; +import io.deephaven.engine.table.impl.locations.TableDataException; import io.deephaven.engine.table.impl.select.FunctionalColumn; import io.deephaven.engine.table.impl.select.SelectColumn; import io.deephaven.engine.table.impl.sources.ReinterpretUtils; import io.deephaven.engine.table.impl.util.ColumnHolder; import io.deephaven.engine.table.impl.select.FormulaEvaluationException; import io.deephaven.engine.table.iterators.*; -import io.deephaven.engine.testutil.TstUtils; +import io.deephaven.engine.testutil.ControlledUpdateGraph; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.engine.util.BigDecimalUtils; import io.deephaven.engine.util.file.TrackedFileHandleFactory; @@ -36,6 +37,7 @@ import io.deephaven.engine.util.TableTools; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.test.types.OutOfBandTest; +import io.deephaven.util.QueryConstants; import io.deephaven.util.codec.SimpleByteArrayCodec; import junit.framework.TestCase; import org.apache.parquet.column.Encoding; @@ -80,6 +82,23 @@ import javax.annotation.Nullable; import static io.deephaven.engine.testutil.TstUtils.assertTableEquals; +import static io.deephaven.engine.util.TableTools.booleanCol; +import static io.deephaven.engine.util.TableTools.byteCol; +import static io.deephaven.engine.util.TableTools.charCol; +import static io.deephaven.engine.util.TableTools.doubleCol; +import static io.deephaven.engine.util.TableTools.floatCol; +import static io.deephaven.engine.util.TableTools.instantCol; +import static io.deephaven.engine.util.TableTools.intCol; +import static io.deephaven.engine.util.TableTools.longCol; +import static io.deephaven.engine.util.TableTools.merge; +import static io.deephaven.engine.util.TableTools.newTable; +import static io.deephaven.engine.util.TableTools.shortCol; +import static io.deephaven.engine.util.TableTools.stringCol; +import static io.deephaven.parquet.table.ParquetTools.readFlatPartitionedTable; +import static io.deephaven.parquet.table.ParquetTools.readKeyValuePartitionedTable; +import static io.deephaven.parquet.table.ParquetTools.readSingleFileTable; +import static io.deephaven.parquet.table.ParquetTools.readTable; +import static io.deephaven.parquet.table.ParquetTools.writeTable; import static io.deephaven.util.QueryConstants.*; import static org.junit.Assert.*; @@ -89,6 +108,9 @@ public final class ParquetTableReadWriteTest { private static final String ROOT_FILENAME = ParquetTableReadWriteTest.class.getName() + "_root"; private static final int LARGE_TABLE_SIZE = 2_000_000; + private static final ParquetInstructions EMPTY = ParquetInstructions.EMPTY; + private static final ParquetInstructions REFRESHING = ParquetInstructions.builder().setIsRefreshing(true).build(); + private static File rootFile; @Rule @@ -234,34 +256,30 @@ private static Table getGroupedTable(int size, boolean includeSerializable) { private void flatTable(String tableName, int size, boolean includeSerializable) { final Table tableToSave = getTableFlat(size, includeSerializable, true); final File dest = new File(rootFile, "ParquetTest_" + tableName + "_test.parquet"); - ParquetTools.writeTable(tableToSave, dest); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(maybeFixBigDecimal(tableToSave), fromDisk); + writeTable(tableToSave, dest); + checkSingleTable(maybeFixBigDecimal(tableToSave), dest); } private void groupedTable(String tableName, int size, boolean includeSerializable) { final Table tableToSave = getGroupedTable(size, includeSerializable); final File dest = new File(rootFile, "ParquetTest_" + tableName + "_test.parquet"); - ParquetTools.writeTable(tableToSave, dest, tableToSave.getDefinition()); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(tableToSave, fromDisk); + writeTable(tableToSave, dest, tableToSave.getDefinition()); + checkSingleTable(tableToSave, dest); } private void groupedOneColumnTable(String tableName, int size) { final Table tableToSave = getGroupedOneColumnTable(size); TableTools.show(tableToSave, 50); final File dest = new File(rootFile, "ParquetTest_" + tableName + "_test.parquet"); - ParquetTools.writeTable(tableToSave, dest, tableToSave.getDefinition()); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(tableToSave, fromDisk); + writeTable(tableToSave, dest, tableToSave.getDefinition()); + checkSingleTable(tableToSave, dest); } private void testEmptyArrayStore(String tableName, int size) { final Table tableToSave = getEmptyArray(size); final File dest = new File(rootFile, "ParquetTest_" + tableName + "_test.parquet"); - ParquetTools.writeTable(tableToSave, dest, tableToSave.getDefinition()); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(tableToSave, fromDisk); + writeTable(tableToSave, dest, tableToSave.getDefinition()); + checkSingleTable(tableToSave, dest); } @Test @@ -269,9 +287,8 @@ public void emptyTrivialTable() { final Table t = TableTools.emptyTable(0).select("A = i"); assertEquals(int.class, t.getDefinition().getColumn("A").getDataType()); final File dest = new File(rootFile, "ParquetTest_emptyTrivialTable.parquet"); - ParquetTools.writeTable(t, dest); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(t, fromDisk); + writeTable(t, dest); + final Table fromDisk = checkSingleTable(t, dest); assertEquals(t.getDefinition(), fromDisk.getDefinition()); } @@ -301,9 +318,8 @@ public void groupingByLongKey() { ((QueryTable) TableTools.emptyTable(10).select("someInt = i", "someLong = ii % 3") .groupBy("someLong").ungroup("someInt")).withDefinitionUnsafe(definition); final File dest = new File(rootFile, "ParquetTest_groupByLong_test.parquet"); - ParquetTools.writeTable(testTable, dest); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(fromDisk, testTable); + writeTable(testTable, dest); + final Table fromDisk = checkSingleTable(testTable, dest); TestCase.assertNotNull(fromDisk.getColumnSource("someLong").getGroupToRange()); } @@ -317,9 +333,8 @@ public void groupingByStringKey() { .where("i % 2 == 0").groupBy("someString").ungroup("someInt")) .withDefinitionUnsafe(definition); final File dest = new File(rootFile, "ParquetTest_groupByString_test.parquet"); - ParquetTools.writeTable(testTable, dest); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(fromDisk, testTable); + writeTable(testTable, dest); + final Table fromDisk = checkSingleTable(testTable, dest); TestCase.assertNotNull(fromDisk.getColumnSource("someString").getGroupToRange()); } @@ -333,19 +348,17 @@ public void groupingByBigInt() { .select("someInt = i", "someBigInt = BigInteger.valueOf(i % 3)").where("i % 2 == 0") .groupBy("someBigInt").ungroup("someInt")).withDefinitionUnsafe(definition); final File dest = new File(rootFile, "ParquetTest_groupByBigInt_test.parquet"); - ParquetTools.writeTable(testTable, dest); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(fromDisk, testTable); + writeTable(testTable, dest); + final Table fromDisk = checkSingleTable(testTable, dest); TestCase.assertNotNull(fromDisk.getColumnSource("someBigInt").getGroupToRange()); } private void compressionCodecTestHelper(final ParquetInstructions codec) { File dest = new File(rootFile + File.separator + "Table1.parquet"); final Table table1 = getTableFlat(10000, false, true); - ParquetTools.writeTable(table1, dest, codec); + writeTable(table1, dest, codec); assertTrue(dest.length() > 0L); - final Table table2 = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(maybeFixBigDecimal(table1), table2); + checkSingleTable(maybeFixBigDecimal(table1), dest); } @Test @@ -368,24 +381,23 @@ public void test_lz4_compressed() { // Write and read a LZ4 compressed file File dest = new File(rootFile + File.separator + "Table.parquet"); final Table table = getTableFlat(100, false, false); - ParquetTools.writeTable(table, dest, ParquetTools.LZ4); - Table fromDisk = ParquetTools.readTable(dest).select(); - TstUtils.assertTableEquals(fromDisk, table); + writeTable(table, dest, ParquetTools.LZ4); + + final Table fromDisk = checkSingleTable(table, dest).select(); try { // The following file is tagged as LZ4 compressed based on its metadata, but is actually compressed with // LZ4_RAW. We should be able to read it anyway with no exceptions. String path = TestParquetTools.class.getResource("/sample_lz4_compressed.parquet").getFile(); - fromDisk = ParquetTools.readTable(path).select(); + readSingleFileTable(new File(path), EMPTY).select(); } catch (RuntimeException e) { TestCase.fail("Failed to read parquet file sample_lz4_compressed.parquet"); } File randomDest = new File(rootFile, "random.parquet"); - ParquetTools.writeTable(fromDisk, randomDest, ParquetTools.LZ4_RAW); + writeTable(fromDisk, randomDest, ParquetTools.LZ4_RAW); // Read the LZ4 compressed file again, to make sure we use a new adapter - fromDisk = ParquetTools.readTable(dest).select(); - TstUtils.assertTableEquals(fromDisk, table); + checkSingleTable(table, randomDest); } @Test @@ -422,11 +434,10 @@ public void testBigDecimalPrecisionScale() { final BigDecimal myBigDecimal = new BigDecimal(".0005"); assertEquals(1, myBigDecimal.precision()); assertEquals(4, myBigDecimal.scale()); - final Table table = TableTools - .newTable(new ColumnHolder<>("MyBigDecimal", BigDecimal.class, null, false, myBigDecimal)); + final Table table = newTable(new ColumnHolder<>("MyBigDecimal", BigDecimal.class, null, false, myBigDecimal)); final File dest = new File(rootFile, "ParquetTest_testBigDecimalPrecisionScale.parquet"); - ParquetTools.writeTable(table, dest); - final Table fromDisk = ParquetTools.readTable(dest); + writeTable(table, dest); + final Table fromDisk = readSingleFileTable(dest, EMPTY); try (final CloseableIterator it = fromDisk.objectColumnIterator("MyBigDecimal")) { assertTrue(it.hasNext()); final BigDecimal item = it.next(); @@ -436,14 +447,13 @@ public void testBigDecimalPrecisionScale() { } private static void writeReadTableTest(final Table table, final File dest) { - writeReadTableTest(table, dest, ParquetInstructions.EMPTY); + writeReadTableTest(table, dest, EMPTY); } private static void writeReadTableTest(final Table table, final File dest, final ParquetInstructions writeInstructions) { - ParquetTools.writeTable(table, dest, writeInstructions); - final Table fromDisk = ParquetTools.readTable(dest); - TstUtils.assertTableEquals(table, fromDisk); + writeTable(table, dest, writeInstructions); + checkSingleTable(table, dest); } @Test @@ -623,9 +633,9 @@ private interface TestParquetTableWriter { void writeTable(final Table table, final File destFile); } - TestParquetTableWriter singleWriter = (table, destFile) -> ParquetTools.writeTable(table, destFile); - TestParquetTableWriter multiWriter = (table, destFile) -> ParquetTools.writeTables(new Table[] {table}, - table.getDefinition(), new File[] {destFile}); + private static final TestParquetTableWriter SINGLE_WRITER = ParquetTools::writeTable; + private static final TestParquetTableWriter MULTI_WRITER = (table, destFile) -> ParquetTools + .writeTables(new Table[] {table}, table.getDefinition(), new File[] {destFile}); /** * Verify that the parent directory contains the expected parquet files and index files in the right directory @@ -668,8 +678,8 @@ private static void verifyFilesInDir(final File parentDir, final String[] expect */ @Test public void basicWriteTests() { - basicWriteTestsImpl(singleWriter); - basicWriteTestsImpl(multiWriter); + basicWriteTestsImpl(SINGLE_WRITER); + basicWriteTestsImpl(MULTI_WRITER); } private static void basicWriteTestsImpl(TestParquetTableWriter writer) { @@ -684,8 +694,8 @@ private static void basicWriteTestsImpl(TestParquetTableWriter writer) { final File destFile = new File(parentDir, filename); writer.writeTable(tableToSave, destFile); verifyFilesInDir(parentDir, new String[] {filename}, null); - Table fromDisk = ParquetTools.readTable(destFile); - TstUtils.assertTableEquals(fromDisk, tableToSave); + + checkSingleTable(tableToSave, destFile); // This write should fail final Table badTable = TableTools.emptyTable(5) @@ -699,15 +709,13 @@ private static void basicWriteTestsImpl(TestParquetTableWriter writer) { // Make sure that original file is preserved and no temporary files verifyFilesInDir(parentDir, new String[] {filename}, null); - fromDisk = ParquetTools.readTable(destFile); - TstUtils.assertTableEquals(fromDisk, tableToSave); + checkSingleTable(tableToSave, destFile); // Write a new table successfully at the same path final Table newTableToSave = TableTools.emptyTable(5).update("A=(int)i"); writer.writeTable(newTableToSave, destFile); verifyFilesInDir(parentDir, new String[] {filename}, null); - fromDisk = ParquetTools.readTable(destFile); - TstUtils.assertTableEquals(fromDisk, newTableToSave); + checkSingleTable(newTableToSave, destFile); FileUtils.deleteRecursively(parentDir); } @@ -737,8 +745,8 @@ public void writeMultiTableBasicTest() { ParquetTools.writeTables(tablesToSave, firstTable.getDefinition(), destFiles); verifyFilesInDir(parentDir, new String[] {firstFilename, secondFilename}, null); - TstUtils.assertTableEquals(ParquetTools.readTable(firstDestFile), firstTable); - TstUtils.assertTableEquals(ParquetTools.readTable(secondDestFile), secondTable); + checkSingleTable(firstTable, firstDestFile); + checkSingleTable(secondTable, secondDestFile); } /** @@ -780,8 +788,8 @@ public void writeMultiTableExceptionTest() { */ @Test public void groupingColumnsBasicWriteTests() { - groupingColumnsBasicWriteTestsImpl(singleWriter); - groupingColumnsBasicWriteTestsImpl(multiWriter); + groupingColumnsBasicWriteTestsImpl(SINGLE_WRITER); + groupingColumnsBasicWriteTestsImpl(MULTI_WRITER); } public void groupingColumnsBasicWriteTestsImpl(TestParquetTableWriter writer) { @@ -795,7 +803,7 @@ public void groupingColumnsBasicWriteTestsImpl(TestParquetTableWriter writer) { data[i] = i / 4; } final TableDefinition tableDefinition = TableDefinition.of(ColumnDefinition.ofInt("vvv").withGrouping()); - final Table tableToSave = TableTools.newTable(tableDefinition, TableTools.col("vvv", data)); + final Table tableToSave = newTable(tableDefinition, TableTools.col("vvv", data)); final String destFilename = "groupingColumnsWriteTests.parquet"; final File destFile = new File(parentDir, destFilename); @@ -803,8 +811,7 @@ public void groupingColumnsBasicWriteTestsImpl(TestParquetTableWriter writer) { String vvvIndexFilePath = ".dh_metadata/indexes/vvv/index_vvv_groupingColumnsWriteTests.parquet"; verifyFilesInDir(parentDir, new String[] {destFilename}, Map.of("vvv", new String[] {vvvIndexFilePath})); - Table fromDisk = ParquetTools.readTable(destFile); - TstUtils.assertTableEquals(fromDisk, tableToSave); + checkSingleTable(tableToSave, destFile); // Verify that the key-value metadata in the file has the correct name ParquetTableLocationKey tableLocationKey = new ParquetTableLocationKey(destFile, 0, null); @@ -813,7 +820,7 @@ public void groupingColumnsBasicWriteTestsImpl(TestParquetTableWriter writer) { // Write another table but this write should fail final TableDefinition badTableDefinition = TableDefinition.of(ColumnDefinition.ofInt("www").withGrouping()); - final Table badTable = TableTools.newTable(badTableDefinition, TableTools.col("www", data)) + final Table badTable = newTable(badTableDefinition, TableTools.col("www", data)) .updateView("InputString = ii % 2 == 0 ? Long.toString(ii) : null", "A=InputString.charAt(0)"); try { writer.writeTable(badTable, destFile); @@ -824,8 +831,7 @@ public void groupingColumnsBasicWriteTestsImpl(TestParquetTableWriter writer) { // Make sure that original file is preserved and no temporary files verifyFilesInDir(parentDir, new String[] {destFilename}, Map.of("vvv", new String[] {vvvIndexFilePath})); - fromDisk = ParquetTools.readTable(destFile); - TstUtils.assertTableEquals(fromDisk, tableToSave); + checkSingleTable(tableToSave, destFile); FileUtils.deleteRecursively(parentDir); } @@ -838,7 +844,7 @@ public void legacyGroupingFileReadTest() { // Read the legacy file and verify that grouping column is read correctly final Table fromDisk; try { - fromDisk = ParquetTools.readTable(destFile); + fromDisk = readSingleFileTable(destFile, EMPTY); } catch (RuntimeException e) { if (e.getCause() instanceof InvalidParquetFileException) { final String InvalidParquetFileErrorMsgString = "Invalid parquet file detected, please ensure the " + @@ -864,8 +870,8 @@ public void legacyGroupingFileReadTest() { } final TableDefinition tableDefinition = TableDefinition.of(ColumnDefinition.ofInt(groupingColName).withGrouping()); - final Table table = TableTools.newTable(tableDefinition, TableTools.col(groupingColName, data)); - TstUtils.assertTableEquals(fromDisk, table); + final Table table = newTable(tableDefinition, TableTools.col(groupingColName, data)); + assertTableEquals(fromDisk, table); } @Test @@ -880,17 +886,17 @@ public void parquetDirectoryWithDotFilesTest() throws IOException { data[i] = i / 4; } final TableDefinition tableDefinition = TableDefinition.of(ColumnDefinition.ofInt("vvv").withGrouping()); - final Table tableToSave = TableTools.newTable(tableDefinition, TableTools.col("vvv", data)); + final Table tableToSave = newTable(tableDefinition, TableTools.col("vvv", data)); final String destFilename = "data.parquet"; final File destFile = new File(parentDir, destFilename); - ParquetTools.writeTable(tableToSave, destFile); + writeTable(tableToSave, destFile); String vvvIndexFilePath = ".dh_metadata/indexes/vvv/index_vvv_data.parquet"; verifyFilesInDir(parentDir, new String[] {destFilename}, Map.of("vvv", new String[] {vvvIndexFilePath})); // Call readTable on parent directory - Table fromDisk = ParquetTools.readTable(parentDir); - TstUtils.assertTableEquals(fromDisk, tableToSave); + Table fromDisk = readFlatPartitionedTable(parentDir, EMPTY); + assertTableEquals(fromDisk, tableToSave); // Add an empty dot file and dot directory (with valid parquet files) in the parent directory final File dotFile = new File(parentDir, ".dotFile"); @@ -898,16 +904,16 @@ public void parquetDirectoryWithDotFilesTest() throws IOException { final File dotDir = new File(parentDir, ".dotDir"); assertTrue(dotDir.mkdir()); final Table someTable = TableTools.emptyTable(5).update("A=(int)i"); - ParquetTools.writeTable(someTable, new File(dotDir, "data.parquet")); - fromDisk = ParquetTools.readTable(parentDir); - TstUtils.assertTableEquals(fromDisk, tableToSave); + writeTable(someTable, new File(dotDir, "data.parquet")); + fromDisk = readFlatPartitionedTable(parentDir, EMPTY); + assertTableEquals(fromDisk, tableToSave); // Add a dot parquet in parent directory final Table anotherTable = TableTools.emptyTable(5).update("A=(int)i"); final File pqDotFile = new File(parentDir, ".dotFile.parquet"); - ParquetTools.writeTable(anotherTable, pqDotFile); - fromDisk = ParquetTools.readTable(parentDir); - TstUtils.assertTableEquals(fromDisk, tableToSave); + writeTable(anotherTable, pqDotFile); + fromDisk = readFlatPartitionedTable(parentDir, EMPTY); + assertTableEquals(fromDisk, tableToSave); } @Test @@ -923,10 +929,10 @@ public void partitionedParquetWithDotFilesTest() throws IOException { final File secondPartition = new File(parentDir, "X=B"); final File secondDataFile = new File(secondPartition, "data.parquet"); - ParquetTools.writeTable(someTable, firstDataFile); - ParquetTools.writeTable(someTable, secondDataFile); + writeTable(someTable, firstDataFile); + writeTable(someTable, secondDataFile); - Table partitionedTable = ParquetTools.readTable(parentDir).select(); + Table partitionedTable = readKeyValuePartitionedTable(parentDir, EMPTY).select(); final Set columnsSet = partitionedTable.getDefinition().getColumnNameSet(); assertTrue(columnsSet.size() == 2 && columnsSet.contains("A") && columnsSet.contains("X")); @@ -935,16 +941,16 @@ public void partitionedParquetWithDotFilesTest() throws IOException { assertTrue(dotFile.createNewFile()); final File dotDir = new File(firstPartition, ".dotDir"); assertTrue(dotDir.mkdir()); - ParquetTools.writeTable(someTable, new File(dotDir, "data.parquet")); - Table fromDisk = ParquetTools.readTable(parentDir); - TstUtils.assertTableEquals(fromDisk, partitionedTable); + writeTable(someTable, new File(dotDir, "data.parquet")); + Table fromDisk = readKeyValuePartitionedTable(parentDir, EMPTY); + assertTableEquals(fromDisk, partitionedTable); // Add a dot parquet file in one of the partitions directory final Table anotherTable = TableTools.emptyTable(5).update("B=(int)i"); final File pqDotFile = new File(secondPartition, ".dotFile.parquet"); - ParquetTools.writeTable(anotherTable, pqDotFile); - fromDisk = ParquetTools.readTable(parentDir); - TstUtils.assertTableEquals(fromDisk, partitionedTable); + writeTable(anotherTable, pqDotFile); + fromDisk = readKeyValuePartitionedTable(parentDir, EMPTY); + assertTableEquals(fromDisk, partitionedTable); } /** @@ -961,11 +967,11 @@ public void writeMultiTableGroupingColumnTest() { data[i] = i / 4; } final TableDefinition tableDefinition = TableDefinition.of(ColumnDefinition.ofInt("vvv").withGrouping()); - final Table firstTable = TableTools.newTable(tableDefinition, TableTools.col("vvv", data)); + final Table firstTable = newTable(tableDefinition, TableTools.col("vvv", data)); final String firstFilename = "firstTable.parquet"; final File firstDestFile = new File(parentDir, firstFilename); - final Table secondTable = TableTools.newTable(tableDefinition, TableTools.col("vvv", data)); + final Table secondTable = newTable(tableDefinition, TableTools.col("vvv", data)); final String secondFilename = "secondTable.parquet"; final File secondDestFile = new File(parentDir, secondFilename); @@ -988,14 +994,14 @@ public void writeMultiTableGroupingColumnTest() { assertTrue(metadataString.contains(secondIndexFilePath)); // Read back the files and verify contents match - TstUtils.assertTableEquals(ParquetTools.readTable(firstDestFile), firstTable); - TstUtils.assertTableEquals(ParquetTools.readTable(secondDestFile), secondTable); + checkSingleTable(firstTable, firstDestFile); + checkSingleTable(secondTable, secondDestFile); } @Test public void groupingColumnsOverwritingTests() { - groupingColumnsOverwritingTestsImpl(singleWriter); - groupingColumnsOverwritingTestsImpl(multiWriter); + groupingColumnsOverwritingTestsImpl(SINGLE_WRITER); + groupingColumnsOverwritingTestsImpl(MULTI_WRITER); } public void groupingColumnsOverwritingTestsImpl(TestParquetTableWriter writer) { @@ -1009,7 +1015,7 @@ public void groupingColumnsOverwritingTestsImpl(TestParquetTableWriter writer) { data[i] = i / 4; } final TableDefinition tableDefinition = TableDefinition.of(ColumnDefinition.ofInt("vvv").withGrouping()); - final Table tableToSave = TableTools.newTable(tableDefinition, TableTools.col("vvv", data)); + final Table tableToSave = newTable(tableDefinition, TableTools.col("vvv", data)); final String destFilename = "groupingColumnsWriteTests.parquet"; final File destFile = new File(parentDir, destFilename); @@ -1018,7 +1024,7 @@ public void groupingColumnsOverwritingTestsImpl(TestParquetTableWriter writer) { // Write a new table successfully at the same position with different grouping columns final TableDefinition anotherTableDefinition = TableDefinition.of(ColumnDefinition.ofInt("xxx").withGrouping()); - Table anotherTableToSave = TableTools.newTable(anotherTableDefinition, TableTools.col("xxx", data)); + Table anotherTableToSave = newTable(anotherTableDefinition, TableTools.col("xxx", data)); writer.writeTable(anotherTableToSave, destFile); final String xxxIndexFilePath = ".dh_metadata/indexes/xxx/index_xxx_groupingColumnsWriteTests.parquet"; @@ -1028,8 +1034,7 @@ public void groupingColumnsOverwritingTestsImpl(TestParquetTableWriter writer) { Map.of("vvv", new String[] {vvvIndexFilePath}, "xxx", new String[] {xxxIndexFilePath})); - Table fromDisk = ParquetTools.readTable(destFile); - TstUtils.assertTableEquals(fromDisk, anotherTableToSave); + checkSingleTable(anotherTableToSave, destFile); ParquetTableLocationKey tableLocationKey = new ParquetTableLocationKey(destFile, 0, null); String metadataString = tableLocationKey.getMetadata().getFileMetaData().toString(); @@ -1056,8 +1061,8 @@ public void groupingColumnsOverwritingTestsImpl(TestParquetTableWriter writer) { @Test public void readChangedUnderlyingFileTests() { - readChangedUnderlyingFileTestsImpl(singleWriter); - readChangedUnderlyingFileTestsImpl(multiWriter); + readChangedUnderlyingFileTestsImpl(SINGLE_WRITER); + readChangedUnderlyingFileTestsImpl(MULTI_WRITER); } public void readChangedUnderlyingFileTestsImpl(TestParquetTableWriter writer) { @@ -1066,31 +1071,31 @@ public void readChangedUnderlyingFileTestsImpl(TestParquetTableWriter writer) { final String filename = "readChangedUnderlyingFileTests.parquet"; final File destFile = new File(rootFile, filename); writer.writeTable(tableToSave, destFile); - Table fromDisk = ParquetTools.readTable(destFile); + Table fromDisk = readSingleFileTable(destFile, EMPTY); // At this point, fromDisk is not fully materialized in the memory and would be read from the file on demand // Change the underlying file final Table stringTable = TableTools.emptyTable(5).update("InputString = Long.toString(ii)"); writer.writeTable(stringTable, destFile); - Table stringFromDisk = ParquetTools.readTable(destFile).select(); - TstUtils.assertTableEquals(stringTable, stringFromDisk); + Table stringFromDisk = readSingleFileTable(destFile, EMPTY).select(); + assertTableEquals(stringTable, stringFromDisk); // Close all the file handles so that next time when fromDisk is accessed, we need to reopen the file handle TrackedFileHandleFactory.getInstance().closeAll(); - // Read back fromDisk and compare it with original table. Since the underlying file has changed, - // assertTableEquals will try to read the file and would crash + // Read back fromDisk. Since the underlying file has changed, we expect this to fail. try { - TstUtils.assertTableEquals(tableToSave, fromDisk); - TestCase.fail(); - } catch (Exception ignored) { + fromDisk.coalesce(); + TestCase.fail("Expected TableDataException"); + } catch (TableDataException ignored) { + // expected } } @Test public void readModifyWriteTests() { - readModifyWriteTestsImpl(singleWriter); - readModifyWriteTestsImpl(multiWriter); + readModifyWriteTestsImpl(SINGLE_WRITER); + readModifyWriteTestsImpl(MULTI_WRITER); } public void readModifyWriteTestsImpl(TestParquetTableWriter writer) { @@ -1099,7 +1104,7 @@ public void readModifyWriteTestsImpl(TestParquetTableWriter writer) { final String filename = "readModifyWriteTests.parquet"; final File destFile = new File(rootFile, filename); writer.writeTable(tableToSave, destFile); - Table fromDisk = ParquetTools.readTable(destFile); + Table fromDisk = readSingleFileTable(destFile, EMPTY); // At this point, fromDisk is not fully materialized in the memory and would be read from the file on demand // Create a view table on fromDisk which should fail on writing, and try to write at the same location @@ -1119,7 +1124,7 @@ public void readModifyWriteTestsImpl(TestParquetTableWriter writer) { // Read back fromDisk and compare it with original table. If the underlying file has not been corrupted or // swapped out, then we would not be able to read from the file - TstUtils.assertTableEquals(tableToSave, fromDisk); + assertTableEquals(tableToSave, fromDisk); } @Test @@ -1134,9 +1139,8 @@ public void dictionaryEncodingTest() { .build(); final Table stringTable = TableTools.emptyTable(numRows).select(Selectable.from(columns)); final File dest = new File(rootFile + File.separator + "dictEncoding.parquet"); - ParquetTools.writeTable(stringTable, dest, writeInstructions); - Table fromDisk = ParquetTools.readTable(dest); - assertTableEquals(stringTable, fromDisk); + writeTable(stringTable, dest, writeInstructions); + checkSingleTable(stringTable, dest); // Verify that string columns are properly dictionary encoded final ParquetMetadata metadata = new ParquetTableLocationKey(dest, 0, null).getMetadata(); @@ -1190,9 +1194,8 @@ private static ColumnChunkMetaData overflowingStringsTestHelper(final Collection .build(); Table stringTable = TableTools.emptyTable(numRows).select(Selectable.from(columns)); final File dest = new File(rootFile + File.separator + "overflowingStringsTest.parquet"); - ParquetTools.writeTable(stringTable, dest, writeInstructions); - Table fromDisk = ParquetTools.readTable(dest).select(); - assertTableEquals(stringTable, fromDisk); + writeTable(stringTable, dest, writeInstructions); + checkSingleTable(stringTable, dest); ParquetMetadata metadata = new ParquetTableLocationKey(dest, 0, null).getMetadata(); ColumnChunkMetaData columnMetadata = metadata.getBlocks().get(0).getColumns().get(0); @@ -1211,13 +1214,12 @@ public void overflowingCodecsTest() { ColumnDefinition.fromGenericType("VariableWidthByteArrayColumn", byte[].class, byte.class); final TableDefinition tableDefinition = TableDefinition.of(columnDefinition); final byte[] byteArray = new byte[pageSize / 2]; - final Table table = TableTools.newTable(tableDefinition, + final Table table = newTable(tableDefinition, TableTools.col("VariableWidthByteArrayColumn", byteArray, byteArray, byteArray)); final File dest = new File(rootFile + File.separator + "overflowingCodecsTest.parquet"); - ParquetTools.writeTable(table, dest, writeInstructions); - Table fromDisk = ParquetTools.readTable(dest).select(); - assertTableEquals(table, fromDisk); + writeTable(table, dest, writeInstructions); + checkSingleTable(table, dest); final ParquetMetadata metadata = new ParquetTableLocationKey(dest, 0, null).getMetadata(); final String metadataStr = metadata.getFileMetaData().getKeyValueMetaData().get("deephaven"); @@ -1238,34 +1240,31 @@ public void readWriteStatisticsTest() { ColumnDefinition.fromGenericType("VariableWidthByteArrayColumn", byte[].class, byte.class); final TableDefinition tableDefinition = TableDefinition.of(columnDefinition); final byte[] byteArray = new byte[] {1, 2, 3, 4, NULL_BYTE, 6, 7, 8, 9, NULL_BYTE, 11, 12, 13}; - final Table simpleTable = TableTools.newTable(tableDefinition, + final Table simpleTable = newTable(tableDefinition, TableTools.col("VariableWidthByteArrayColumn", null, byteArray, byteArray, byteArray, byteArray, byteArray)); final File simpleTableDest = new File(rootFile, "ParquetTest_simple_statistics_test.parquet"); - ParquetTools.writeTable(simpleTable, simpleTableDest); + writeTable(simpleTable, simpleTableDest); - final Table simpleFromDisk = ParquetTools.readTable(simpleTableDest); - TstUtils.assertTableEquals(simpleTable, simpleFromDisk); + checkSingleTable(simpleTable, simpleTableDest); assertTableStatistics(simpleTable, simpleTableDest); // Test flat columns. final Table flatTableToSave = getTableFlat(10_000, true, true); final File flatTableDest = new File(rootFile, "ParquetTest_flat_statistics_test.parquet"); - ParquetTools.writeTable(flatTableToSave, flatTableDest); + writeTable(flatTableToSave, flatTableDest); - final Table flatFromDisk = ParquetTools.readTable(flatTableDest); - TstUtils.assertTableEquals(maybeFixBigDecimal(flatTableToSave), flatFromDisk); + checkSingleTable(maybeFixBigDecimal(flatTableToSave), flatTableDest); assertTableStatistics(flatTableToSave, flatTableDest); // Test nested columns. final Table groupedTableToSave = getGroupedTable(10_000, true); final File groupedTableDest = new File(rootFile, "ParquetTest_grouped_statistics_test.parquet"); - ParquetTools.writeTable(groupedTableToSave, groupedTableDest, groupedTableToSave.getDefinition()); + writeTable(groupedTableToSave, groupedTableDest, groupedTableToSave.getDefinition()); - final Table groupedFromDisk = ParquetTools.readTable(groupedTableDest); - TstUtils.assertTableEquals(groupedTableToSave, groupedFromDisk); + checkSingleTable(groupedTableToSave, groupedTableDest); assertTableStatistics(groupedTableToSave, groupedTableDest); } @@ -1349,7 +1348,7 @@ public void verifyPyArrowStatistics() { final File pyarrowDest = new File(path); final Table pyarrowFromDisk; try { - pyarrowFromDisk = ParquetTools.readTable(pyarrowDest); + pyarrowFromDisk = readSingleFileTable(pyarrowDest, EMPTY); } catch (RuntimeException e) { if (e.getCause() instanceof InvalidParquetFileException) { final String InvalidParquetFileErrorMsgString = "Invalid parquet file detected, please ensure the " + @@ -1364,13 +1363,9 @@ public void verifyPyArrowStatistics() { // Write the table to disk using our code. final File dhDest = new File(rootFile, "ParquetTest_statistics_test.parquet"); - ParquetTools.writeTable(pyarrowFromDisk, dhDest); + writeTable(pyarrowFromDisk, dhDest); - // Read the table back in using our code. - final Table dhFromDisk = ParquetTools.readTable(dhDest); - - // Verify the two tables loaded from disk are equal. - TstUtils.assertTableEquals(pyarrowFromDisk, dhFromDisk); + final Table dhFromDisk = checkSingleTable(pyarrowFromDisk, dhDest); // Run the verification code against DHC writer stats. assertTableStatistics(pyarrowFromDisk, dhDest); @@ -1378,17 +1373,380 @@ public void verifyPyArrowStatistics() { } @Test - public void inferParquetOrderLastKey() { + public void singleTable() { + final File fooSource = new File(rootFile, "singleTable/foo.parquet"); + final File fooBarSource = new File(rootFile, "singleTable/fooBar.parquet"); + final File barSource = new File(rootFile, "singleTable/bar.parquet"); + + final Table foo; + final Table fooBar; + final Table bar; + final Table fooBarNullFoo; + final Table fooBarNullBar; + + final TableDefinition fooDefinition; + final TableDefinition fooBarDefinition; + final TableDefinition barDefinition; + { + fooSource.mkdirs(); + fooBarSource.mkdirs(); + barSource.mkdirs(); + + final ColumnHolder fooCol = intCol("Foo", 1, 2, 3); + final ColumnHolder barCol = stringCol("Bar", "Zip", "Zap", "Zoom"); + + final ColumnHolder nullFooCol = + intCol("Foo", QueryConstants.NULL_INT, QueryConstants.NULL_INT, QueryConstants.NULL_INT); + final ColumnHolder nullBarCol = stringCol("Bar", null, null, null); + + final ColumnDefinition fooColDef = ColumnDefinition.ofInt("Foo"); + final ColumnDefinition barColDef = ColumnDefinition.ofString("Bar"); + + fooDefinition = TableDefinition.of(fooColDef); + fooBarDefinition = TableDefinition.of(fooColDef, barColDef); + barDefinition = TableDefinition.of(barColDef); + + foo = newTable(fooDefinition, fooCol); + fooBar = newTable(fooBarDefinition, fooCol, barCol); + bar = newTable(barDefinition, barCol); + + fooBarNullFoo = newTable(fooBarDefinition, nullFooCol, barCol); + fooBarNullBar = newTable(fooBarDefinition, fooCol, nullBarCol); + + writeTable(foo, fooSource); + writeTable(fooBar, fooBarSource); + writeTable(bar, barSource); + } + + // Infer + { + checkSingleTable(foo, fooSource); + checkSingleTable(fooBar, fooBarSource); + checkSingleTable(bar, barSource); + } + + // readTable inference to readSingleTable + { + assertTableEquals(foo, readTable(fooSource)); + assertTableEquals(fooBar, readTable(fooBarSource)); + assertTableEquals(bar, readTable(barSource)); + } + + // Explicit + { + assertTableEquals(foo, readSingleFileTable(fooSource, EMPTY, fooDefinition)); + assertTableEquals(fooBar, readSingleFileTable(fooBarSource, EMPTY, fooBarDefinition)); + assertTableEquals(bar, readSingleFileTable(barSource, EMPTY, barDefinition)); + } + + // Explicit subset + { + // fooBar as foo + assertTableEquals(foo, readSingleFileTable(fooBarSource, EMPTY, fooDefinition)); + // fooBar as bar + assertTableEquals(bar, readSingleFileTable(fooBarSource, EMPTY, barDefinition)); + } + + // Explicit superset + { + // foo as fooBar + assertTableEquals(fooBarNullBar, readSingleFileTable(fooSource, EMPTY, fooBarDefinition)); + // bar as fooBar + assertTableEquals(fooBarNullFoo, readSingleFileTable(barSource, EMPTY, fooBarDefinition)); + } + + // No refreshing single table support + { + try { + readSingleFileTable(fooSource, REFRESHING); + fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertEquals("Unable to have a refreshing single parquet file", e.getMessage()); + } + + try { + readSingleFileTable(fooSource, REFRESHING, fooDefinition); + fail("Expected IllegalArgumentException"); + } catch (IllegalArgumentException e) { + assertEquals("Unable to have a refreshing single parquet file", e.getMessage()); + } + } + } + + @Test + public void flatPartitionedTable() { // Create an empty parent directory - final File parentDir = new File(rootFile, "inferParquetOrder"); - parentDir.mkdir(); - final TableDefinition td1 = TableDefinition.of(ColumnDefinition.ofInt("Foo")); - final TableDefinition td2 = - TableDefinition.of(ColumnDefinition.ofInt("Foo"), ColumnDefinition.ofString("Bar")); - ParquetTools.writeTable(TableTools.newTable(td1), new File(parentDir, "01.parquet")); - ParquetTools.writeTable(TableTools.newTable(td2), new File(parentDir, "02.parquet")); - final Table table = ParquetTools.readTable(parentDir); - assertEquals(td2, table.getDefinition()); + final File source = new File(rootFile, "flatPartitionedTable/source"); + final File emptySource = new File(rootFile, "flatPartitionedTable/emptySource"); + + final Table formerData; + final Table latterData; + final TableDefinition formerDefinition; + final TableDefinition latterDefinition; + final Runnable writeIntoEmptySource; + { + final File p1File = new File(source, "01.parquet"); + final File p2File = new File(source, "02.parquet"); + + final File p1FileEmpty = new File(emptySource, "01.parquet"); + final File p2FileEmpty = new File(emptySource, "02.parquet"); + + p1File.mkdirs(); + p2File.mkdirs(); + emptySource.mkdirs(); + + final ColumnHolder foo1 = intCol("Foo", 1, 2, 3); + final ColumnHolder foo2 = intCol("Foo", 4, 5); + + final ColumnHolder bar1 = stringCol("Bar", null, null, null); + final ColumnHolder bar2 = stringCol("Bar", "Zip", "Zap"); + + final Table p1 = newTable(foo1); + final Table p2 = newTable(foo2, bar2); + writeTable(p1, p1File); + writeTable(p2, p2File); + writeIntoEmptySource = () -> { + p1FileEmpty.mkdirs(); + p2FileEmpty.mkdirs(); + writeTable(p1, p1FileEmpty); + writeTable(p2, p2FileEmpty); + }; + + final ColumnDefinition foo = ColumnDefinition.ofInt("Foo"); + final ColumnDefinition bar = ColumnDefinition.ofString("Bar"); + + formerDefinition = TableDefinition.of(foo); + latterDefinition = TableDefinition.of(foo, bar); + + formerData = merge( + newTable(formerDefinition, foo1), + newTable(formerDefinition, foo2)); + latterData = merge( + newTable(latterDefinition, foo1, bar1), + newTable(latterDefinition, foo2, bar2)); + } + + // Infer from last key + { + final Table table = readFlatPartitionedTable(source, EMPTY); + assertTableEquals(latterData, table); + } + // Infer from last key, refreshing + { + final Table table = readFlatPartitionedTable(source, REFRESHING); + assertTableEquals(latterData, table); + } + // readTable inference to readFlatPartitionedTable + { + assertTableEquals(latterData, readTable(source)); + } + + // Explicit latter definition + { + final Table table = readFlatPartitionedTable(source, EMPTY, latterDefinition); + assertTableEquals(latterData, table); + } + // Explicit latter definition, refreshing + { + final Table table = readFlatPartitionedTable(source, REFRESHING, latterDefinition); + assertTableEquals(latterData, table); + } + + // Explicit former definition + { + final Table table = readFlatPartitionedTable(source, EMPTY, formerDefinition); + assertTableEquals(formerData, table); + } + // Explicit former definition, refreshing + { + final Table table = readFlatPartitionedTable(source, REFRESHING, formerDefinition); + assertTableEquals(formerData, table); + } + + // Explicit definition, empty directory + { + final Table table = readFlatPartitionedTable(emptySource, EMPTY, latterDefinition); + assertTableEquals(TableTools.newTable(latterDefinition), table); + } + // Explicit definition, empty directory, refreshing with new data added + { + final Table table = readFlatPartitionedTable(emptySource, REFRESHING, latterDefinition); + assertTableEquals(TableTools.newTable(latterDefinition), table); + + writeIntoEmptySource.run(); + ExecutionContext.getContext().getUpdateGraph().cast().runWithinUnitTestCycle(() -> { + // This is not generally a good way to do this sort of testing. Ideally, we'd be a bit smarter and use + // a test-driven TableDataRefreshService.getSharedRefreshService. + ((SourceTable) table).tableLocationProvider().refresh(); + ((SourceTable) table).refresh(); + assertTableEquals(latterData, table); + }); + } + } + + @Test + public void keyValuePartitionedTable() { + final File source = new File(rootFile, "keyValuePartitionedTable/source"); + final File emptySource = new File(rootFile, "keyValuePartitionedTable/emptySource"); + + final Table formerData; + final Table latterData; + final TableDefinition formerDefinition; + final TableDefinition latterDefinition; + final Runnable writeIntoEmptySource; + { + final File p1File = new File(source, "Partition=1/z.parquet"); + final File p2File = new File(source, "Partition=2/a.parquet"); + + final File p1FileEmpty = new File(emptySource, "Partition=1/z.parquet"); + final File p2FileEmpty = new File(emptySource, "Partition=2/a.parquet"); + + p1File.mkdirs(); + p2File.mkdirs(); + emptySource.mkdirs(); + + final ColumnHolder part1 = intCol("Partition", 1, 1, 1); + final ColumnHolder part2 = intCol("Partition", 2, 2); + + final ColumnHolder foo1 = intCol("Foo", 1, 2, 3); + final ColumnHolder foo2 = intCol("Foo", 4, 5); + + final ColumnHolder bar1 = stringCol("Bar", null, null, null); + final ColumnHolder bar2 = stringCol("Bar", "Zip", "Zap"); + + final Table p1 = newTable(foo1); + final Table p2 = newTable(foo2, bar2); + writeTable(p1, p1File); + writeTable(p2, p2File); + writeIntoEmptySource = () -> { + p1FileEmpty.mkdirs(); + p2FileEmpty.mkdirs(); + writeTable(p1, p1FileEmpty); + writeTable(p2, p2FileEmpty); + }; + + // Need to be explicit w/ definition so partitioning column applied to expected tables + final ColumnDefinition partition = ColumnDefinition.ofInt("Partition").withPartitioning(); + final ColumnDefinition foo = ColumnDefinition.ofInt("Foo"); + final ColumnDefinition bar = ColumnDefinition.ofString("Bar"); + + // Note: merge does not preserve partition column designation, so we need to explicitly create them + formerDefinition = TableDefinition.of(partition, foo); + latterDefinition = TableDefinition.of(partition, foo, bar); + + formerData = merge( + newTable(formerDefinition, part1, foo1), + newTable(formerDefinition, part2, foo2)); + latterData = merge( + newTable(latterDefinition, part1, foo1, bar1), + newTable(latterDefinition, part2, foo2, bar2)); + } + + // Infer from last key + { + final Table table = readKeyValuePartitionedTable(source, EMPTY); + assertTableEquals(latterData, table); + } + // Infer from last key, refreshing + { + final Table table = readKeyValuePartitionedTable(source, REFRESHING); + assertTableEquals(latterData, table); + } + // readTable inference readKeyValuePartitionedTable + { + assertTableEquals(latterData, readTable(source)); + } + + // Explicit latter definition + { + final Table table = readKeyValuePartitionedTable(source, EMPTY, latterDefinition); + assertTableEquals(latterData, table); + } + // Explicit latter definition, refreshing + { + final Table table = readKeyValuePartitionedTable(source, REFRESHING, latterDefinition); + assertTableEquals(latterData, table); + } + + // Explicit former definition + { + final Table table = readKeyValuePartitionedTable(source, EMPTY, formerDefinition); + assertTableEquals(formerData, table); + } + // Explicit former definition, refreshing + { + final Table table = readKeyValuePartitionedTable(source, REFRESHING, formerDefinition); + assertTableEquals(formerData, table); + } + + // Explicit definition, empty directory + { + final Table table = readKeyValuePartitionedTable(emptySource, EMPTY, latterDefinition); + assertTableEquals(TableTools.newTable(latterDefinition), table); + } + // Explicit definition, empty directory, refreshing with new data added + { + final Table table = readKeyValuePartitionedTable(emptySource, REFRESHING, latterDefinition); + assertTableEquals(TableTools.newTable(latterDefinition), table); + + writeIntoEmptySource.run(); + ExecutionContext.getContext().getUpdateGraph().cast().runWithinUnitTestCycle(() -> { + // This is not generally a good way to do this sort of testing. Ideally, we'd be a bit smarter and use + // a test-driven TableDataRefreshService.getSharedRefreshService. + ((SourceTable) table).tableLocationProvider().refresh(); + ((SourceTable) table).refresh(); + assertTableEquals(latterData, table); + }); + } + } + + @Test + public void readSingleColumn() { + final File file = new File(rootFile, "readSingleColumn.parquet"); + final Table primitives = newTable( + booleanCol("Bool", null, true), + charCol("Char", NULL_CHAR, (char) 42), + byteCol("Byte", NULL_BYTE, (byte) 42), + shortCol("Short", NULL_SHORT, (short) 42), + intCol("Int", NULL_INT, 42), + longCol("Long", NULL_LONG, 42L), + floatCol("Float", NULL_FLOAT, 42.0f), + doubleCol("Double", NULL_DOUBLE, 42.0), + stringCol("String", null, "42"), + instantCol("Instant", null, Instant.ofEpochMilli(42))); + { + writeTable(primitives, file); + } + assertTableEquals( + primitives.view("Bool"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofBoolean("Bool")))); + assertTableEquals( + primitives.view("Char"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofChar("Char")))); + assertTableEquals( + primitives.view("Byte"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofByte("Byte")))); + assertTableEquals( + primitives.view("Short"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofShort("Short")))); + assertTableEquals( + primitives.view("Int"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofInt("Int")))); + assertTableEquals( + primitives.view("Long"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofLong("Long")))); + assertTableEquals( + primitives.view("Float"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofFloat("Float")))); + assertTableEquals( + primitives.view("Double"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofDouble("Double")))); + assertTableEquals( + primitives.view("String"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofString("String")))); + assertTableEquals( + primitives.view("Instant"), + readSingleFileTable(file, EMPTY, TableDefinition.of(ColumnDefinition.ofTime("Instant")))); } private void assertTableStatistics(Table inputTable, File dest) { @@ -2717,4 +3075,18 @@ private void assertBigIntegerColumnStatistics(SerialObjectColumnIterator Optional[jpy.JType]: + if table_definition is None: + return None + elif isinstance(table_definition, Dict): + return _JTableDefinition.of( + [ + Column(name=name, data_type=dtype).j_column_definition + for name, dtype in table_definition.items() + ] + ) + elif isinstance(table_definition, List): + return _JTableDefinition.of( + [col.j_column_definition for col in table_definition] + ) + else: + raise DHError(f"Unexpected table_definition type: {type(table_definition)}") + +class ParquetFileLayout(Enum): + """ The parquet file layout. """ + + SINGLE_FILE = 1 + """ A single parquet file. """ + + FLAT_PARTITIONED = 2 + """ A single directory of parquet files. """ + + KV_PARTITIONED = 3 + """ A key-value directory partitioning of parquet files. """ + + METADATA_PARTITIONED = 4 + """ A directory containing a _metadata parquet file and an optional _common_metadata parquet file. """ + def read( path: str, - col_instructions: List[ColumnInstruction] = None, + col_instructions: Optional[List[ColumnInstruction]] = None, is_legacy_parquet: bool = False, is_refreshing: bool = False, + file_layout: Optional[ParquetFileLayout] = None, + table_definition: Union[Dict[str, DType], List[Column], None] = None, ) -> Table: """ Reads in a table from a single parquet, metadata file, or directory with recognized layout. Args: path (str): the file or directory to examine - col_instructions (List[ColumnInstruction]): instructions for customizations while reading + col_instructions (Optional[List[ColumnInstruction]]): instructions for customizations while reading, None by + default. is_legacy_parquet (bool): if the parquet data is legacy is_refreshing (bool): if the parquet data represents a refreshing source - + file_layout (Optional[ParquetFileLayout]): the parquet file layout, by default None. When None, the layout is + inferred. + table_definition (Union[Dict[str, DType], List[Column], None]): the table definition, by default None. When None, + the definition is inferred from the parquet file(s). Setting a definition guarantees the returned table will + have that definition. This is useful for bootstrapping purposes when the initial partitioned directory is + empty and is_refreshing=True. It is also useful for specifying a subset of the parquet definition. When set, + file_layout must also be set. Returns: a table @@ -113,12 +160,36 @@ def read( is_legacy_parquet=is_legacy_parquet, is_refreshing=is_refreshing, for_read=True, + force_build=True, ) - - if read_instructions: - return Table(j_table=_JParquetTools.readTable(path, read_instructions)) + j_table_definition = _j_table_definition(table_definition) + if j_table_definition is not None: + if not file_layout: + raise DHError("Must provide file_layout when table_definition is set") + if file_layout == ParquetFileLayout.SINGLE_FILE: + j_table = _JParquetTools.readSingleFileTable(_JFile(path), read_instructions, j_table_definition) + elif file_layout == ParquetFileLayout.FLAT_PARTITIONED: + j_table = _JParquetTools.readFlatPartitionedTable(_JFile(path), read_instructions, j_table_definition) + elif file_layout == ParquetFileLayout.KV_PARTITIONED: + j_table = _JParquetTools.readKeyValuePartitionedTable(_JFile(path), read_instructions, j_table_definition) + elif file_layout == ParquetFileLayout.METADATA_PARTITIONED: + raise DHError(f"file_layout={ParquetFileLayout.METADATA_PARTITIONED} with table_definition not currently supported") + else: + raise DHError(f"Invalid parquet file_layout '{file_layout}'") else: - return Table(j_table=_JParquetTools.readTable(path)) + if not file_layout: + j_table = _JParquetTools.readTable(path, read_instructions) + elif file_layout == ParquetFileLayout.SINGLE_FILE: + j_table = _JParquetTools.readSingleFileTable(_JFile(path), read_instructions) + elif file_layout == ParquetFileLayout.FLAT_PARTITIONED: + j_table = _JParquetTools.readFlatPartitionedTable(_JFile(path), read_instructions) + elif file_layout == ParquetFileLayout.KV_PARTITIONED: + j_table = _JParquetTools.readKeyValuePartitionedTable(_JFile(path), read_instructions) + elif file_layout == ParquetFileLayout.METADATA_PARTITIONED: + j_table = _JParquetTools.readPartitionedTableWithMetadata(_JFile(path), read_instructions) + else: + raise DHError(f"Invalid parquet file_layout '{file_layout}'") + return Table(j_table=j_table) except Exception as e: raise DHError(e, "failed to read parquet data.") from e diff --git a/py/server/tests/test_parquet.py b/py/server/tests/test_parquet.py index 36c70515000..56cce45957a 100644 --- a/py/server/tests/test_parquet.py +++ b/py/server/tests/test_parquet.py @@ -12,25 +12,23 @@ from deephaven import DHError, empty_table, dtypes, new_table from deephaven import arrow as dharrow -from deephaven.column import InputColumn +from deephaven.column import InputColumn, Column, ColumnType from deephaven.pandas import to_pandas, to_table -from deephaven.parquet import write, batch_write, read, delete, ColumnInstruction +from deephaven.parquet import write, batch_write, read, delete, ColumnInstruction, ParquetFileLayout from tests.testbase import BaseTestCase class ParquetTestCase(BaseTestCase): """ Test cases for the deephaven.ParquetTools module (performed locally) """ - @classmethod - def setUpClass(cls): - super().setUpClass() + def setUp(self): + super().setUp() # define a junk table workspace directory - cls.temp_dir = tempfile.TemporaryDirectory() + self.temp_dir = tempfile.TemporaryDirectory() - @classmethod - def tearDownClass(cls): - cls.temp_dir.cleanup() - super().tearDownClass() + def tearDown(self): + self.temp_dir.cleanup() + super().tearDown() def test_crd(self): """ Test suite for reading, writing, and deleting a table to disk """ @@ -51,7 +49,7 @@ def test_crd(self): with self.subTest(msg="write_table(Table, str)"): write(table, file_location) self.assertTrue(os.path.exists(file_location)) - table2 = read(file_location) + table2 = read(file_location, file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(table, table2) shutil.rmtree(base_dir) @@ -59,7 +57,7 @@ def test_crd(self): batch_write([table, table], [file_location, file_location2], definition) self.assertTrue(os.path.exists(file_location)) self.assertTrue(os.path.exists(file_location2)) - table2 = read(file_location) + table2 = read(file_location, file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(table, table2) # Delete @@ -114,7 +112,7 @@ def test_crd_with_instructions(self): # Reading with self.subTest(msg="read_table(str)"): - table2 = read(path=file_location, col_instructions=[col_inst, col_inst1]) + table2 = read(path=file_location, col_instructions=[col_inst, col_inst1], file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(table, table2) # Delete @@ -141,7 +139,7 @@ def test_big_decimal(self): shutil.rmtree(file_location) write(table, file_location) - table2 = read(file_location) + table2 = read(file_location, file_layout=ParquetFileLayout.SINGLE_FILE) self.assertEqual(table.size, table2.size) self.assert_table_equals(table, table2) @@ -158,7 +156,7 @@ def test_int96_timestamps(self): dataframe = to_pandas(dh_table) table = pyarrow.Table.from_pandas(dataframe) pyarrow.parquet.write_table(table, 'data_from_pa.parquet', use_deprecated_int96_timestamps=True) - from_disk_int96 = read('data_from_pa.parquet') + from_disk_int96 = read('data_from_pa.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(dh_table, from_disk_int96) # Read the parquet file as a pandas dataframe, and ensure all values are written as null @@ -168,7 +166,7 @@ def test_int96_timestamps(self): # Write the timestamps as int64 using deephaven writing code and compare with int96 table write(dh_table, "data_from_dh.parquet") - from_disk_int64 = read('data_from_dh.parquet') + from_disk_int64 = read('data_from_dh.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(from_disk_int64, from_disk_int96) def get_table_data(self): @@ -261,7 +259,7 @@ def round_trip_with_compression(self, compression_codec_name, dh_table, vector_c write(dh_table, "data_from_dh.parquet", compression_codec_name=compression_codec_name) # Read the parquet file using deephaven.parquet and compare - result_table = read('data_from_dh.parquet') + result_table = read('data_from_dh.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(dh_table, result_table) # LZO is not fully supported in pyarrow, so we can't do the rest of the tests @@ -296,14 +294,14 @@ def round_trip_with_compression(self, compression_codec_name, dh_table, vector_c compression=None if compression_codec_name == 'UNCOMPRESSED' else "LZ4" if compression_codec_name == 'LZ4_RAW' or compression_codec_name == 'LZ4RAW' else compression_codec_name) - result_table = read('data_from_pandas.parquet') + result_table = read('data_from_pandas.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(dh_table, result_table) # dh->dataframe (via pyarrow)->parquet->dh # TODO(deephaven-core#3149) disable for now, since to_pandas results in "None" strings instead of None values # dataframe = to_pandas(dh_table) # dataframe.to_parquet('data_from_pandas.parquet', compression=None if compression_codec_name is 'UNCOMPRESSED' else compression_codec_name) - # result_table = read('data_from_pandas.parquet') + # result_table = read('data_from_pandas.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) # self.assert_table_equals(dh_table, result_table) def test_writing_lists_via_pyarrow(self): @@ -312,7 +310,7 @@ def test_writing_lists_via_pyarrow(self): pa_table = pyarrow.table({'numList': [[2, 2, 4]], 'stringList': [["Flamingo", "Parrot", "Dog"]]}) pyarrow.parquet.write_table(pa_table, 'data_from_pa.parquet') - from_disk = read('data_from_pa.parquet').select() + from_disk = read('data_from_pa.parquet', file_layout=ParquetFileLayout.SINGLE_FILE).select() pa_table_from_disk = dharrow.to_arrow(from_disk) self.assertTrue(pa_table.equals(pa_table_from_disk)) @@ -324,7 +322,7 @@ def test_dictionary_encoding(self): ]) # Force "longStringColumn" to use non-dictionary encoding write(dh_table, "data_from_dh.parquet", max_dictionary_size=100) - from_disk = read('data_from_dh.parquet') + from_disk = read('data_from_dh.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(dh_table, from_disk) metadata = pyarrow.parquet.read_metadata("data_from_dh.parquet") @@ -344,7 +342,7 @@ def test_dates_and_time(self): ]) write(dh_table, "data_from_dh.parquet", compression_codec_name="SNAPPY") - from_disk = read('data_from_dh.parquet') + from_disk = read('data_from_dh.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) self.assert_table_equals(dh_table, from_disk) # TODO dtype_backend=None is a workaround until https://github.com/deephaven/deephaven-core/issues/4823 is fixed @@ -366,7 +364,7 @@ def test_dates_and_time(self): # Rewrite the dataframe back to parquet using pyarrow and read it back using deephaven.parquet to compare df_from_pandas.to_parquet('data_from_pandas.parquet', compression='SNAPPY') - from_disk_pandas = read('data_from_pandas.parquet') + from_disk_pandas = read('data_from_pandas.parquet', file_layout=ParquetFileLayout.SINGLE_FILE) # Compare only the non-null columns because null columns are written as different logical types by pandas and # deephaven @@ -384,7 +382,7 @@ def test_time_with_different_units(self): def time_test_helper(pa_table, new_schema, dest): # Write the provided pyarrow table type-casted to the new schema pyarrow.parquet.write_table(pa_table.cast(new_schema), dest) - from_disk = read(dest) + from_disk = read(dest, file_layout=ParquetFileLayout.SINGLE_FILE) # TODO dtype_backend=None is a workaround until https://github.com/deephaven/deephaven-core/issues/4823 is fixed df_from_disk = to_pandas(from_disk, dtype_backend=None) @@ -420,7 +418,7 @@ def timestamp_test_helper(pa_table, new_schema, dest): if "isAdjustedToUTC=false" not in str(metadata.row_group(0).column(0)): self.fail("isAdjustedToUTC is not set to false") # Read the parquet file back using deephaven and write it back - dh_table_from_disk = read(dest) + dh_table_from_disk = read(dest, file_layout=ParquetFileLayout.SINGLE_FILE) dh_dest = "dh_" + dest write(dh_table_from_disk, dh_dest) # Read the new parquet file using pyarrow and compare against original table @@ -436,6 +434,124 @@ def timestamp_test_helper(pa_table, new_schema, dest): schema_msec = table.schema.set(0, pyarrow.field('f', pyarrow.timestamp('ms'))) timestamp_test_helper(table, schema_msec, 'timestamp_test_msec.parquet') + def test_read_single_file(self): + table = empty_table(3).update( + formulas=["x=i", "y=(double)(i/10.0)", "z=(double)(i*i)"] + ) + single_parquet = os.path.join(self.temp_dir.name, "single.parquet") + write(table, single_parquet) + + with self.subTest(msg="read infer single file infer definition"): + actual = read(single_parquet) + self.assert_table_equals(actual, table) + + with self.subTest(msg="read single file infer definition"): + actual = read(single_parquet, file_layout=ParquetFileLayout.SINGLE_FILE) + self.assert_table_equals(actual, table) + + with self.subTest(msg="read single file"): + actual = read( + single_parquet, + table_definition={ + "x": dtypes.int32, + "y": dtypes.double, + "z": dtypes.double, + }, + file_layout=ParquetFileLayout.SINGLE_FILE, + ) + self.assert_table_equals(actual, table) + + def test_read_flat_partitioned(self): + table = empty_table(6).update( + formulas=["x=i", "y=(double)(i/10.0)", "z=(double)(i*i)"] + ) + flat_dir = self.temp_dir.name + f1_parquet = os.path.join(flat_dir, "f1.parquet") + f2_parquet = os.path.join(flat_dir, "f2.parquet") + + write(table.head(3), f1_parquet) + write(table.tail(3), f2_parquet) + + with self.subTest(msg="read infer flat infer definition"): + actual = read(flat_dir) + self.assert_table_equals(actual, table) + + with self.subTest(msg="read flat infer definition"): + actual = read(flat_dir, file_layout=ParquetFileLayout.FLAT_PARTITIONED) + self.assert_table_equals(actual, table) + + with self.subTest(msg="read flat"): + actual = read( + flat_dir, + table_definition={ + "x": dtypes.int32, + "y": dtypes.double, + "z": dtypes.double, + }, + file_layout=ParquetFileLayout.FLAT_PARTITIONED, + ) + self.assert_table_equals(actual, table) + + def test_read_kv_partitioned(self): + table = empty_table(6).update( + formulas=[ + "Partition=(int)(i/3)", + "x=i", + "y=(double)(i/10.0)", + "z=(double)(i*i)", + ] + ) + kv_dir = self.temp_dir.name + p0_dir = os.path.join(kv_dir, "Partition=0") + p1_dir = os.path.join(kv_dir, "Partition=1") + os.mkdir(p0_dir) + os.mkdir(p1_dir) + f1_parquet = os.path.join(p0_dir, "f1.parquet") + f2_parquet = os.path.join(p1_dir, "f2.parquet") + + write(table.head(3).drop_columns(["Partition"]), f1_parquet) + write(table.tail(3).drop_columns(["Partition"]), f2_parquet) + + with self.subTest(msg="read infer kv infer definition"): + actual = read(kv_dir) + self.assert_table_equals(actual, table) + + with self.subTest(msg="read kv infer definition"): + actual = read(kv_dir, file_layout=ParquetFileLayout.KV_PARTITIONED) + self.assert_table_equals(actual, table) + + with self.subTest(msg="read kv"): + actual = read( + kv_dir, + table_definition=[ + Column( + "Partition", dtypes.int32, column_type=ColumnType.PARTITIONING + ), + Column("x", dtypes.int32), + Column("y", dtypes.double), + Column("z", dtypes.double), + ], + file_layout=ParquetFileLayout.KV_PARTITIONED, + ) + self.assert_table_equals(actual, table) + + def test_read_with_table_definition_no_type(self): + # no need to write actual file, shouldn't be reading it + fake_parquet = os.path.join(self.temp_dir.name, "fake.parquet") + with self.subTest(msg="read definition no type"): + with self.assertRaises(DHError) as cm: + read( + fake_parquet, + table_definition={ + "x": dtypes.int32, + "y": dtypes.double, + "z": dtypes.double, + }, + ) + self.assertIn( + "Must provide file_layout when table_definition is set", str(cm.exception) + ) + if __name__ == '__main__': unittest.main() From cd416bd5e09d27eebfc3d6ad6acb1d18336ac0bf Mon Sep 17 00:00:00 2001 From: Larry Booker Date: Thu, 16 Nov 2023 14:06:01 -0800 Subject: [PATCH 31/41] Fix testing tools `NaN` comparison and correct new failing tests. (#4794) * Initial commit of emstd bug fix and test. * Initial commit of NaN test tooling bug-fix. * Correct post-merge test failure. * Some PR comments addressed * Numeric class modified to better handle NaN and Inf values, new tests to verify standard behavior. * Expanded the NaN and Inf test code to document our expectations. * Added new tests to cover some short circuit behavior. * Update engine/function/src/templates/Numeric.ftl Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> * Update engine/function/src/templates/Numeric.ftl Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> --------- Co-authored-by: Chip Kent <5250374+chipkent@users.noreply.github.com> --- engine/function/src/templates/Numeric.ftl | 169 +++++++++--------- engine/function/src/templates/TestNumeric.ftl | 150 +++++++++++++++- .../impl/by/DoubleChunkedVarOperator.java | 28 ++- .../impl/by/FloatChunkedVarOperator.java | 28 ++- .../rollingavg/ByteRollingAvgOperator.java | 6 +- .../rollingavg/CharRollingAvgOperator.java | 6 +- .../rollingavg/DoubleRollingAvgOperator.java | 6 +- .../rollingavg/FloatRollingAvgOperator.java | 6 +- .../rollingavg/IntRollingAvgOperator.java | 6 +- .../rollingavg/LongRollingAvgOperator.java | 6 +- .../rollingavg/ShortRollingAvgOperator.java | 6 +- .../DoubleRollingProductOperator.java | 25 ++- .../FloatRollingProductOperator.java | 28 ++- .../rollingstd/ByteRollingStdOperator.java | 5 +- .../rollingstd/CharRollingStdOperator.java | 5 +- .../rollingstd/DoubleRollingStdOperator.java | 5 +- .../rollingstd/FloatRollingStdOperator.java | 5 +- .../rollingstd/IntRollingStdOperator.java | 5 +- .../rollingstd/LongRollingStdOperator.java | 5 +- .../rollingstd/ShortRollingStdOperator.java | 5 +- .../io/deephaven/engine/util/TableDiff.java | 27 ++- .../table/impl/updateby/BaseUpdateByTest.java | 17 +- .../impl/updateby/TestRollingProduct.java | 80 +++++++-- .../replicators/ReplicateUpdateBy.java | 10 +- 24 files changed, 495 insertions(+), 144 deletions(-) diff --git a/engine/function/src/templates/Numeric.ftl b/engine/function/src/templates/Numeric.ftl index 6270b603bea..d9d5404d3ca 100644 --- a/engine/function/src/templates/Numeric.ftl +++ b/engine/function/src/templates/Numeric.ftl @@ -365,6 +365,9 @@ public class Numeric { try ( final ${pt.vectorIterator} vi = values.iterator() ) { while ( vi.hasNext() ) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); + if (isNaN(c)) { + return Double.NaN; + } if (!isNull(c)) { sum += c; count++; @@ -416,6 +419,12 @@ public class Numeric { try ( final ${pt.vectorIterator} vi = values.iterator() ) { while ( vi.hasNext() ) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); + if (isNaN(c)) { + return Double.NaN; + } + if (isInf(c)) { + return Double.POSITIVE_INFINITY; + } if (!isNull(c)) { sum += Math.abs(c); count++; @@ -472,11 +481,13 @@ public class Numeric { double sum = 0; double sum2 = 0; - double count = 0; - + long count = 0; try ( final ${pt.vectorIterator} vi = values.iterator() ) { while ( vi.hasNext() ) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); + if (isNaN(c) || isInf(c)) { + return Double.NaN; + } if (!isNull(c)) { sum += (double)c; sum2 += (double)c * (double)c; @@ -485,19 +496,19 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute sample variance. - if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2)) { + // Return NaN if overflow or too few values to compute variance. + if (count <= 1 || Double.isInfinite(sum) || Double.isInfinite(sum2)) { return Double.NaN; } // Perform the calculation in a way that minimizes the impact of floating point error. final double eps = Math.ulp(sum2); - final double vs2bar = sum * (sum / count); + final double vs2bar = sum * (sum / (double)count); final double delta = sum2 - vs2bar; final double rel_eps = delta / eps; // Return zero when the sample variance is leq the floating point error. - return Math.abs(rel_eps) > 1.0 ? delta / (count - 1) : 0.0; + return Math.abs(rel_eps) > 1.0 ? delta / ((double)count - 1) : 0.0; } <#list primitiveTypes as pt2> @@ -590,7 +601,12 @@ public class Numeric { while (vi.hasNext()) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); final ${pt2.primitive} w = wi.${pt2.iteratorNext}(); - + if (isNaN(c) || isInf(c)) { + return Double.NaN; + } + if (isNaN(w) || isInf(w)) { + return Double.NaN; + } if (!isNull(c) && !isNull(w)) { sum += w * c; sum2 += w * c * c; @@ -600,8 +616,8 @@ public class Numeric { } } - // Return NaN if poisoned or too few values to compute sample variance. - if (count <= 1 || Double.isNaN(sum) || Double.isNaN(sum2) || Double.isNaN(count) || Double.isNaN(count2)) { + // Return NaN if overflow or too few values to compute variance. + if (count <= 1 || Double.isInfinite(sum) || Double.isInfinite(sum2)) { return Double.NaN; } @@ -1333,6 +1349,12 @@ public class Numeric { while (v0i.hasNext()) { final ${pt.primitive} v0 = v0i.${pt.iteratorNext}(); final ${pt2.primitive} v1 = v1i.${pt2.iteratorNext}(); + if (isNaN(v0) || isInf(v0)) { + return Double.NaN; + } + if (isNaN(v1) || isInf(v1)) { + return Double.NaN; + } if (!isNull(v0) && !isNull(v1)) { sum0 += v0; @@ -1421,6 +1443,12 @@ public class Numeric { while (v0i.hasNext()) { final ${pt.primitive} v0 = v0i.${pt.iteratorNext}(); final ${pt2.primitive} v1 = v1i.${pt2.iteratorNext}(); + if (isNaN(v0) || isInf(v0)) { + return Double.NaN; + } + if (isNaN(v1) || isInf(v1)) { + return Double.NaN; + } if (!isNull(v0) && !isNull(v1)) { sum0 += v0; @@ -1460,6 +1488,11 @@ public class Numeric { try ( final ${pt.vectorIterator} vi = values.iterator() ) { while ( vi.hasNext() ) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); + <#if pt.valueType.isFloat > + if (isNaN(c)) { + return ${pt.boxed}.NaN; + } + if (!isNull(c)) { sum += c; } @@ -1496,10 +1529,33 @@ public class Numeric { ${pt.primitive} prod = 1; int count = 0; + <#if pt.valueType.isFloat > + long zeroCount = 0; + long infCount = 0; + try ( final ${pt.vectorIterator} vi = values.iterator() ) { while ( vi.hasNext() ) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); + <#if pt.valueType.isFloat > + if (isNaN(c)) { + return ${pt.boxed}.NaN; + } else if (Double.isInfinite(c)) { + if (zeroCount > 0) { + return ${pt.boxed}.NaN; + } + infCount++; + } else if (c == 0) { + if (infCount > 0) { + return ${pt.boxed}.NaN; + } + zeroCount++; + } + <#else> + if (c == 0) { + return 0; + } + if (!isNull(c)) { count++; prod *= c; @@ -1511,7 +1567,11 @@ public class Numeric { return ${pt.null}; } + <#if pt.valueType.isFloat > + return zeroCount > 0 ? 0 : (${pt.primitive}) (prod); + <#else> return (${pt.primitive}) (prod); + } /** @@ -1549,24 +1609,7 @@ public class Numeric { return null; } - if (values.length == 0) { - return new ${pt.primitive}[0]; - } - - ${pt.primitive}[] result = new ${pt.primitive}[values.length]; - result[0] = values[0]; - - for (int i = 1; i < values.length; i++) { - if (isNull(result[i - 1])) { - result[i] = values[i]; - } else if (isNull(values[i])) { - result[i] = result[i - 1]; - } else { - result[i] = (${pt.primitive})Math.min(result[i - 1], values[i]); - } - } - - return result; + return cummin(new ${pt.vectorDirect}(values)); } /** @@ -1630,24 +1673,7 @@ public class Numeric { return null; } - if (values.length == 0) { - return new ${pt.primitive}[0]; - } - - ${pt.primitive}[] result = new ${pt.primitive}[values.length]; - result[0] = values[0]; - - for (int i = 1; i < values.length; i++) { - if (isNull(result[i - 1])) { - result[i] = values[i]; - } else if (isNull(values[i])) { - result[i] = result[i - 1]; - } else { - result[i] = (${pt.primitive})Math.max(result[i - 1], values[i]); - } - } - - return result; + return cummax(new ${pt.vectorDirect}(values)); } /** @@ -1711,24 +1737,7 @@ public class Numeric { return null; } - if (values.length == 0) { - return new ${pt.primitive}[0]; - } - - ${pt.primitive}[] result = new ${pt.primitive}[values.length]; - result[0] = values[0]; - - for (int i = 1; i < values.length; i++) { - if (isNull(result[i - 1])) { - result[i] = values[i]; - } else if (isNull(values[i])) { - result[i] = result[i - 1]; - } else { - result[i] = (${pt.primitive}) (result[i - 1] + values[i]); - } - } - - return result; + return cumsum(new ${pt.vectorDirect}(values)); } /** @@ -1792,24 +1801,7 @@ public class Numeric { return null; } - if (values.length == 0) { - return new ${pt.primitive}[0]; - } - - ${pt.primitive}[] result = new ${pt.primitive}[values.length]; - result[0] = values[0]; - - for (int i = 1; i < values.length; i++) { - if (isNull(result[i - 1])) { - result[i] = values[i]; - } else if (isNull(values[i])) { - result[i] = result[i - 1]; - } else { - result[i] = (${pt.primitive}) (result[i - 1] * values[i]); - } - } - - return result; + return cumprod(new ${pt.vectorDirect}(values)); } /** @@ -2322,7 +2314,13 @@ public class Numeric { while (vi.hasNext()) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); final ${pt2.primitive} w = wi.${pt2.iteratorNext}(); - + if (isNaN(c)) { + return Double.NaN; + } + if (isNaN(w)) { + return Double.NaN; + } + if (!isNull(c) && !isNull(w)) { vsum += c * w; } @@ -2405,7 +2403,12 @@ public class Numeric { while (vi.hasNext()) { final ${pt.primitive} c = vi.${pt.iteratorNext}(); final ${pt2.primitive} w = wi.${pt2.iteratorNext}(); - + if (isNaN(c)) { + return Double.NaN; + } + if (isNaN(w)) { + return Double.NaN; + } if (!isNull(c) && !isNull(w)) { vsum += c * w; wsum += w; diff --git a/engine/function/src/templates/TestNumeric.ftl b/engine/function/src/templates/TestNumeric.ftl index 24226383f07..e8dd31f5fe8 100644 --- a/engine/function/src/templates/TestNumeric.ftl +++ b/engine/function/src/templates/TestNumeric.ftl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2021 Deephaven Data Labs and Patent Pending + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending */ package io.deephaven.function; @@ -8,9 +8,6 @@ import io.deephaven.base.testing.BaseArrayTestCase; import io.deephaven.vector.*; import java.math.BigInteger; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; import static io.deephaven.util.QueryConstants.*; import static io.deephaven.function.Basic.count; @@ -367,6 +364,118 @@ public class TestNumeric extends BaseArrayTestCase { assertEquals(tstat(v), tstat((${pt.primitive})0, (${pt.primitive})40, ${pt.null}, (${pt.primitive})50, (${pt.primitive})60, (${pt.primitive}) -1, (${pt.primitive})0)); } +<#if pt.valueType.isFloat > + public void test${pt.boxed}NaNAndInfHandling() { + double result; + + final ${pt.primitive}[] normal = new ${pt.primitive}[]{1, 2, 3, 4, 5, 6}; + + final ${pt.primitive}[] normalWithNaN = new ${pt.primitive}[]{1, 2, 3, ${pt.boxed}.NaN, 4, 5}; + assertTrue(Double.isNaN(avg(normalWithNaN))); + assertTrue(Double.isNaN(absAvg(normalWithNaN))); + assertTrue(Double.isNaN(var(normalWithNaN))); + assertTrue(Double.isNaN(std(normalWithNaN))); + assertTrue(Double.isNaN(ste(normalWithNaN))); + assertTrue(Double.isNaN(tstat(normalWithNaN))); + + assertTrue(Double.isNaN(cov(normalWithNaN, normal))); + assertTrue(Double.isNaN(cor(normalWithNaN, normal))); + assertTrue(Double.isNaN(wavg(normalWithNaN, normal))); + assertTrue(Double.isNaN(wvar(normalWithNaN, normal))); + assertTrue(Double.isNaN(wstd(normalWithNaN, normal))); + assertTrue(Double.isNaN(wste(normalWithNaN, normal))); + assertTrue(Double.isNaN(wtstat(normalWithNaN, normal))); + + assertTrue(Double.isNaN(cov(normal, normalWithNaN))); + assertTrue(Double.isNaN(cor(normal, normalWithNaN))); + assertTrue(Double.isNaN(wavg(normal, normalWithNaN))); + assertTrue(Double.isNaN(wvar(normal, normalWithNaN))); + assertTrue(Double.isNaN(wstd(normal, normalWithNaN))); + assertTrue(Double.isNaN(wste(normal, normalWithNaN))); + assertTrue(Double.isNaN(wtstat(normal, normalWithNaN))); + + final ${pt.primitive}[] normalWithInf = new ${pt.primitive}[]{1, 2, 3, ${pt.boxed}.POSITIVE_INFINITY, 4, 5}; + result = avg(normalWithInf); + assertTrue(Double.isInfinite(result) && result > 0); // positive infinity + result = absAvg(normalWithInf); + assertTrue(Double.isInfinite(result) && result > 0); // positive infinity + + assertTrue(Double.isNaN(var(normalWithInf))); + assertTrue(Double.isNaN(std(normalWithInf))); + assertTrue(Double.isNaN(ste(normalWithInf))); + assertTrue(Double.isNaN(tstat(normalWithInf))); + + assertTrue(Double.isNaN(cov(normalWithInf, normal))); + assertTrue(Double.isNaN(cor(normalWithInf, normal))); + result = wavg(normalWithInf, normal); + assertTrue(Double.isInfinite(result) && result > 0); // positive infinity + assertTrue(Double.isNaN(wvar(normalWithInf, normal))); + assertTrue(Double.isNaN(wstd(normalWithInf, normal))); + assertTrue(Double.isNaN(wste(normalWithInf, normal))); + assertTrue(Double.isNaN(wtstat(normalWithInf, normal))); + + assertTrue(Double.isNaN(cov(normal, normalWithInf))); + assertTrue(Double.isNaN(cor(normal, normalWithInf))); + assertTrue(Double.isNaN(wavg(normal, normalWithInf))); // is NaN because of inf/inf division + assertTrue(Double.isNaN(wvar(normal, normalWithInf))); + assertTrue(Double.isNaN(wstd(normal, normalWithInf))); + assertTrue(Double.isNaN(wste(normal, normalWithInf))); + assertTrue(Double.isNaN(wtstat(normal, normalWithInf))); + + final ${pt.primitive}[] normalWithNegInf = new ${pt.primitive}[]{1, 2, 3, ${pt.boxed}.NEGATIVE_INFINITY, 4, 5}; + result = avg(normalWithNegInf); + assertTrue(Double.isInfinite(result) && result < 0); // negative infinity + result = absAvg(normalWithNegInf); + assertTrue(Double.isInfinite(result) && result > 0); // positive infinity + + assertTrue(Double.isNaN(var(normalWithNegInf))); + assertTrue(Double.isNaN(std(normalWithNegInf))); + assertTrue(Double.isNaN(ste(normalWithNegInf))); + assertTrue(Double.isNaN(tstat(normalWithNegInf))); + + assertTrue(Double.isNaN(cov(normalWithNegInf, normal))); + assertTrue(Double.isNaN(cor(normalWithNegInf, normal))); + result = wavg(normalWithNegInf, normal); + assertTrue(Double.isInfinite(result) && result < 0); // negative infinity + assertTrue(Double.isNaN(wvar(normalWithNegInf, normal))); + assertTrue(Double.isNaN(wstd(normalWithNegInf, normal))); + assertTrue(Double.isNaN(wste(normalWithNegInf, normal))); + assertTrue(Double.isNaN(wtstat(normalWithNegInf, normal))); + + assertTrue(Double.isNaN(cov(normal, normalWithNegInf))); + assertTrue(Double.isNaN(cor(normal, normalWithNegInf))); + assertTrue(Double.isNaN(wavg(normal, normalWithNegInf))); // is NaN because of -inf/-inf division + assertTrue(Double.isNaN(wvar(normal, normalWithNegInf))); + assertTrue(Double.isNaN(wstd(normal, normalWithNegInf))); + assertTrue(Double.isNaN(wste(normal, normalWithNegInf))); + assertTrue(Double.isNaN(wtstat(normal, normalWithNegInf))); + + <#if pt.primitive == "double" > + // testing normal value overflow. NOTE: this is testing for doubles only, since overflowing a double using + // smaller types is quite difficult + final double LARGE_VALUE = Math.nextDown(Double.MAX_VALUE); + + final double[] overflow = new double[]{1, LARGE_VALUE, LARGE_VALUE}; + assertTrue(Double.isInfinite(avg(overflow))); + + assertTrue(Double.isNaN(var(overflow))); + assertTrue(Double.isNaN(std(overflow))); + assertTrue(Double.isNaN(ste(overflow))); + assertTrue(Double.isNaN(tstat(overflow))); + + final double[] negOverflow = new double[]{1, LARGE_VALUE, -LARGE_VALUE}; + assertTrue(Double.isNaN(var(negOverflow))); + assertTrue(Double.isNaN(std(negOverflow))); + assertTrue(Double.isNaN(ste(negOverflow))); + assertTrue(Double.isNaN(tstat(negOverflow))); + + final double[] negAdditionOverflow = new double[]{1, -LARGE_VALUE, -LARGE_VALUE}; + result = avg(negAdditionOverflow); + assertTrue(Double.isInfinite(result) && result < 0); // negative infinity + + } + + <#list primitiveTypes as pt2> <#if pt2.valueType.isNumber > @@ -512,6 +621,39 @@ public class TestNumeric extends BaseArrayTestCase { assertEquals(${pt.null}, product((${pt.vector}) null)); } +<#if pt.valueType.isFloat > + public void test${pt.boxed}ProductOverflowAndNaN() { + final ${pt.primitive} LARGE_VALUE = Math.nextDown(${pt.boxed}.MAX_VALUE); + + final ${pt.primitive}[] overflow = new ${pt.primitive}[]{1, LARGE_VALUE, LARGE_VALUE}; + final ${pt.primitive} overflowProduct = product(overflow); + assertTrue(${pt.boxed}.isInfinite(overflowProduct) && overflowProduct > 0); + + final ${pt.primitive}[] negOverflow = new ${pt.primitive}[]{1, LARGE_VALUE, -LARGE_VALUE}; + final ${pt.primitive} negOverflowProduct = product(negOverflow); + assertTrue(${pt.boxed}.isInfinite(negOverflowProduct) && negOverflowProduct < 0); + + final ${pt.primitive}[] overflowWithZero = new ${pt.primitive}[]{1, LARGE_VALUE, LARGE_VALUE, 0}; + assertTrue(Math.abs(product(overflowWithZero)) == 0.0); + + final ${pt.primitive}[] normalWithNaN = new ${pt.primitive}[]{1, 2, 3, ${pt.boxed}.NaN, 4, 5}; + assertTrue(${pt.boxed}.isNaN(product(normalWithNaN))); + + final ${pt.primitive}[] posInfAndZero = new ${pt.primitive}[]{1, ${pt.boxed}.POSITIVE_INFINITY, 0}; + assertTrue(${pt.boxed}.isNaN(product(posInfAndZero))); + + final ${pt.primitive}[] negInfAndZero = new ${pt.primitive}[]{1, ${pt.boxed}.NEGATIVE_INFINITY, 0}; + assertTrue(${pt.boxed}.isNaN(product(negInfAndZero))); + + final ${pt.primitive}[] zeroAndPosInf = new ${pt.primitive}[]{1, 0, ${pt.boxed}.POSITIVE_INFINITY}; + assertTrue(${pt.boxed}.isNaN(product(zeroAndPosInf))); + + final ${pt.primitive}[] zeroAndNegInf = new ${pt.primitive}[]{1, 0, ${pt.boxed}.NEGATIVE_INFINITY}; + assertTrue(${pt.boxed}.isNaN(product(zeroAndNegInf))); + + } + + // public void test${pt.boxed}ProdObjectVector() { // assertEquals(new ${pt.primitive}[]{-30, 120}, product(new ObjectVectorDirect<>(new ${pt.primitive}[][]{{5, 4}, {-3, 5}, {2, 6}}))); // assertEquals(new ${pt.primitive}[]{-30, ${pt.null}}, product(new ObjectVectorDirect<>(new ${pt.primitive}[][]{{5, ${pt.null}}, {-3, 5}, {2, 6}}))); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/DoubleChunkedVarOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/DoubleChunkedVarOperator.java index 711ba70ab0a..19b4ef31f6f 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/DoubleChunkedVarOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/DoubleChunkedVarOperator.java @@ -97,7 +97,12 @@ private boolean addChunk(DoubleChunk values, long destination, if (forceNanResult || nonNullCount <= 1) { resultColumn.set(destination, Double.NaN); } else { - final double variance = (newSum2 - newSum * newSum / nonNullCount) / (nonNullCount - 1); + // If the sum or sumSquared has reached +/-Infinity, we are stuck with NaN forever. + if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) { + resultColumn.set(destination, Double.NaN); + return true; + } + final double variance = computeVariance(nonNullCount, newSum, newSum2); resultColumn.set(destination, std ? Math.sqrt(variance) : variance); } return true; @@ -109,6 +114,17 @@ private boolean addChunk(DoubleChunk values, long destination, } } + private static double computeVariance(long nonNullCount, double newSum, double newSum2) { + // Perform the calculation in a way that minimizes the impact of FP error. + final double eps = Math.ulp(newSum2); + final double vs2bar = newSum * (newSum / nonNullCount); + final double delta = newSum2 - vs2bar; + final double rel_eps = delta / eps; + + // Return zero when the variance is leq the FP error or when variance becomes negative + final double variance = Math.abs(rel_eps) > 1.0 ? delta / (nonNullCount - 1) : 0.0; + return Math.max(variance, 0.0); + } private boolean removeChunk(DoubleChunk values, long destination, int chunkStart, int chunkSize) { final MutableDouble sum2 = new MutableDouble(); @@ -150,7 +166,15 @@ private boolean removeChunk(DoubleChunk values, long destinati resultColumn.set(destination, Double.NaN); return true; } - final double variance = (newSum2 - newSum * newSum / totalNormalCount) / (totalNormalCount - 1); + + // If the sum has reach +/-Infinity, we are stuck with NaN forever. + if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) { + resultColumn.set(destination, Double.NaN); + return true; + } + + // Perform the calculation in a way that minimizes the impact of FP error. + final double variance = computeVariance(totalNormalCount, newSum, newSum2); resultColumn.set(destination, std ? Math.sqrt(variance) : variance); return true; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/FloatChunkedVarOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/FloatChunkedVarOperator.java index 5e525b07e2a..641e5e95ec7 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/by/FloatChunkedVarOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/by/FloatChunkedVarOperator.java @@ -92,7 +92,12 @@ private boolean addChunk(FloatChunk values, long destination, if (forceNanResult || nonNullCount <= 1) { resultColumn.set(destination, Double.NaN); } else { - final double variance = (newSum2 - newSum * newSum / nonNullCount) / (nonNullCount - 1); + // If the sum or sumSquared has reached +/-Infinity, we are stuck with NaN forever. + if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) { + resultColumn.set(destination, Double.NaN); + return true; + } + final double variance = computeVariance(nonNullCount, newSum, newSum2); resultColumn.set(destination, std ? Math.sqrt(variance) : variance); } return true; @@ -104,6 +109,17 @@ private boolean addChunk(FloatChunk values, long destination, } } + private static double computeVariance(long nonNullCount, double newSum, double newSum2) { + // Perform the calculation in a way that minimizes the impact of FP error. + final double eps = Math.ulp(newSum2); + final double vs2bar = newSum * (newSum / nonNullCount); + final double delta = newSum2 - vs2bar; + final double rel_eps = delta / eps; + + // Return zero when the variance is leq the FP error or when variance becomes negative + final double variance = Math.abs(rel_eps) > 1.0 ? delta / (nonNullCount - 1) : 0.0; + return Math.max(variance, 0.0); + } private boolean removeChunk(FloatChunk values, long destination, int chunkStart, int chunkSize) { final MutableDouble sum2 = new MutableDouble(); @@ -145,7 +161,15 @@ private boolean removeChunk(FloatChunk values, long destinatio resultColumn.set(destination, Double.NaN); return true; } - final double variance = (newSum2 - newSum * newSum / totalNormalCount) / (totalNormalCount - 1); + + // If the sum has reach +/-Infinity, we are stuck with NaN forever. + if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) { + resultColumn.set(destination, Double.NaN); + return true; + } + + // Perform the calculation in a way that minimizes the impact of FP error. + final double variance = computeVariance(totalNormalCount, newSum, newSum2); resultColumn.set(destination, std ? Math.sqrt(variance) : variance); return true; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ByteRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ByteRollingAvgOperator.java index 91d128411cb..dc354836853 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ByteRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ByteRollingAvgOperator.java @@ -88,7 +88,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = byteWindowValues.size() - nullCount; - outputValues.set(outIdx, curVal / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, curVal / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/CharRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/CharRollingAvgOperator.java index babdae78e29..6f5c7610c74 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/CharRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/CharRollingAvgOperator.java @@ -82,7 +82,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = charWindowValues.size() - nullCount; - outputValues.set(outIdx, curVal / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, curVal / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/DoubleRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/DoubleRollingAvgOperator.java index 3748d4cd90c..3c3e405368d 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/DoubleRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/DoubleRollingAvgOperator.java @@ -84,7 +84,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = aggSum.size() - nullCount; - outputValues.set(outIdx, aggSum.evaluate() / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, aggSum.evaluate() / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/FloatRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/FloatRollingAvgOperator.java index d8e7431c071..220f3df01e9 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/FloatRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/FloatRollingAvgOperator.java @@ -79,7 +79,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = aggSum.size() - nullCount; - outputValues.set(outIdx, aggSum.evaluate() / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, aggSum.evaluate() / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/IntRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/IntRollingAvgOperator.java index 57b8ef4ff5e..3fc6e88bf80 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/IntRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/IntRollingAvgOperator.java @@ -87,7 +87,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = intWindowValues.size() - nullCount; - outputValues.set(outIdx, curVal / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, curVal / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/LongRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/LongRollingAvgOperator.java index 7fae62c0f5d..3fcc9ff5006 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/LongRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/LongRollingAvgOperator.java @@ -87,7 +87,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = longWindowValues.size() - nullCount; - outputValues.set(outIdx, curVal / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, curVal / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ShortRollingAvgOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ShortRollingAvgOperator.java index 7c81f008a00..70920192b0c 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ShortRollingAvgOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingavg/ShortRollingAvgOperator.java @@ -87,7 +87,11 @@ public void writeToOutputChunk(int outIdx) { outputValues.set(outIdx, NULL_DOUBLE); } else { final int count = shortWindowValues.size() - nullCount; - outputValues.set(outIdx, curVal / (double)count); + if (count == 0) { + outputValues.set(outIdx, Double.NaN); + } else { + outputValues.set(outIdx, curVal / (double)count); + } } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/DoubleRollingProductOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/DoubleRollingProductOperator.java index 23385c171c0..6d0d3cef925 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/DoubleRollingProductOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/DoubleRollingProductOperator.java @@ -1,6 +1,6 @@ /* * --------------------------------------------------------------------------------------------------------------------- - * AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit CharRollingProductOperator and regenerate + * AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit FloatRollingProductOperator and regenerate * --------------------------------------------------------------------------------------------------------------------- */ package io.deephaven.engine.table.impl.updateby.rollingproduct; @@ -30,6 +30,8 @@ protected class Context extends BaseDoubleUpdateByOperator.Context { protected AggregatingDoubleRingBuffer buffer; private int zeroCount; + private int nanCount; + private int infCount; protected Context(final int affectedChunkSize, final int influencerChunkSize) { super(affectedChunkSize); @@ -48,6 +50,8 @@ protected Context(final int affectedChunkSize, final int influencerChunkSize) { }, true); zeroCount = 0; + nanCount = 0; + infCount = 0; } @Override @@ -76,6 +80,10 @@ public void push(int pos, int count) { buffer.addUnsafe(val); if (val == 0) { zeroCount++; + } else if (Double.isNaN(val)) { + nanCount++; + } else if (Double.isInfinite(val)) { + infCount++; } } } @@ -90,8 +98,12 @@ public void pop(int count) { if (val == NULL_DOUBLE) { nullCount--; + } else if (Double.isNaN(val)) { + --nanCount; } else if (val == 0) { --zeroCount; + } else if (Double.isInfinite(val)) { + --infCount; } } } @@ -101,7 +113,14 @@ public void writeToOutputChunk(int outIdx) { if (buffer.size() == nullCount) { outputValues.set(outIdx, NULL_DOUBLE); } else { - outputValues.set(outIdx, zeroCount > 0 ? 0.0 : buffer.evaluate()); + if (nanCount > 0 || (infCount > 0 && zeroCount > 0)) { + // Output NaN without evaluating the buffer when the buffer is poisoned with NaNs or when we + // have an Inf * 0 case + outputValues.set(outIdx, Double.NaN); + } else { + // When zeros are present, we can skip evaluating the buffer. + outputValues.set(outIdx, zeroCount > 0 ? 0.0 : buffer.evaluate()); + } } } @@ -109,6 +128,8 @@ public void writeToOutputChunk(int outIdx) { public void reset() { super.reset(); zeroCount = 0; + nanCount = 0; + infCount = 0; buffer.clear(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/FloatRollingProductOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/FloatRollingProductOperator.java index 44e4d737d14..f7868c2c0ae 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/FloatRollingProductOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/FloatRollingProductOperator.java @@ -1,8 +1,3 @@ -/* - * --------------------------------------------------------------------------------------------------------------------- - * AUTO-GENERATED CLASS - DO NOT EDIT MANUALLY - for any changes edit CharRollingProductOperator and regenerate - * --------------------------------------------------------------------------------------------------------------------- - */ package io.deephaven.engine.table.impl.updateby.rollingproduct; import io.deephaven.base.ringbuffer.AggregatingDoubleRingBuffer; @@ -30,6 +25,8 @@ protected class Context extends BaseDoubleUpdateByOperator.Context { protected AggregatingDoubleRingBuffer buffer; private int zeroCount; + private int nanCount; + private int infCount; protected Context(final int affectedChunkSize, final int influencerChunkSize) { super(affectedChunkSize); @@ -48,6 +45,8 @@ protected Context(final int affectedChunkSize, final int influencerChunkSize) { }, true); zeroCount = 0; + nanCount = 0; + infCount = 0; } @Override @@ -76,6 +75,10 @@ public void push(int pos, int count) { buffer.addUnsafe(val); if (val == 0) { zeroCount++; + } else if (Double.isNaN(val)) { + nanCount++; + } else if (Double.isInfinite(val)) { + infCount++; } } } @@ -90,8 +93,12 @@ public void pop(int count) { if (val == NULL_DOUBLE) { nullCount--; + } else if (Double.isNaN(val)) { + --nanCount; } else if (val == 0) { --zeroCount; + } else if (Double.isInfinite(val)) { + --infCount; } } } @@ -101,7 +108,14 @@ public void writeToOutputChunk(int outIdx) { if (buffer.size() == nullCount) { outputValues.set(outIdx, NULL_DOUBLE); } else { - outputValues.set(outIdx, zeroCount > 0 ? 0.0 : buffer.evaluate()); + if (nanCount > 0 || (infCount > 0 && zeroCount > 0)) { + // Output NaN without evaluating the buffer when the buffer is poisoned with NaNs or when we + // have an Inf * 0 case + outputValues.set(outIdx, Double.NaN); + } else { + // When zeros are present, we can skip evaluating the buffer. + outputValues.set(outIdx, zeroCount > 0 ? 0.0 : buffer.evaluate()); + } } } @@ -109,6 +123,8 @@ public void writeToOutputChunk(int outIdx) { public void reset() { super.reset(); zeroCount = 0; + nanCount = 0; + infCount = 0; buffer.clear(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ByteRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ByteRollingStdOperator.java index 1a5c3b42770..1707f555603 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ByteRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ByteRollingStdOperator.java @@ -121,7 +121,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/CharRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/CharRollingStdOperator.java index b965e373639..4e2893a8120 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/CharRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/CharRollingStdOperator.java @@ -115,7 +115,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/DoubleRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/DoubleRollingStdOperator.java index b9320231ba0..3adbc7e04d3 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/DoubleRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/DoubleRollingStdOperator.java @@ -120,7 +120,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/FloatRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/FloatRollingStdOperator.java index e1c90523f14..082fa4f46e7 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/FloatRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/FloatRollingStdOperator.java @@ -120,7 +120,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/IntRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/IntRollingStdOperator.java index d35df8d2423..3aa840cbf17 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/IntRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/IntRollingStdOperator.java @@ -120,7 +120,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/LongRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/LongRollingStdOperator.java index 87cb06edca2..793d441e497 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/LongRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/LongRollingStdOperator.java @@ -120,7 +120,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ShortRollingStdOperator.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ShortRollingStdOperator.java index e4335953489..84ca25415a8 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ShortRollingStdOperator.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingstd/ShortRollingStdOperator.java @@ -120,7 +120,10 @@ public void writeToOutputChunk(int outIdx) { final double valueSquareSum = valueSquareBuffer.evaluate(); final double valueSum = valueBuffer.evaluate(); - if (Double.isNaN(valueSquareSum) || Double.isNaN(valueSum)) { + if (Double.isNaN(valueSquareSum) + || Double.isNaN(valueSum) + || Double.isInfinite(valueSquareSum) + || Double.isInfinite(valueSum)) { outputValues.set(outIdx, Double.NaN); return; } diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TableDiff.java b/engine/table/src/main/java/io/deephaven/engine/util/TableDiff.java index aa1f18a75c1..cdc01bb862a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TableDiff.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TableDiff.java @@ -59,8 +59,8 @@ static Pair diffInternal(Table actualResult, Table expectedResult, } } - final Map actualNameToColumnSource = actualResult.getColumnSourceMap(); - final Map expectedNameToColumnSource = expectedResult.getColumnSourceMap(); + final Map> actualNameToColumnSource = actualResult.getColumnSourceMap(); + final Map> expectedNameToColumnSource = expectedResult.getColumnSourceMap(); final String[] actualColumnNames = actualResult.getDefinition().getColumnNames().toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); final String[] expectedColumnNames = @@ -78,8 +78,8 @@ static Pair diffInternal(Table actualResult, Table expectedResult, final Set columnNamesForDiff = new LinkedHashSet<>(); for (int ci = 0; ci < expectedColumnNames.length; ci++) { final String expectedColumnName = expectedColumnNames[ci]; - final ColumnSource expectedColumnSource = expectedNameToColumnSource.get(expectedColumnName); - final ColumnSource actualColumnSource = actualNameToColumnSource.get(expectedColumnName); + final ColumnSource expectedColumnSource = expectedNameToColumnSource.get(expectedColumnName); + final ColumnSource actualColumnSource = actualNameToColumnSource.get(expectedColumnName); if (actualColumnSource == null) { issues.add("Expected column " + expectedColumnName + " not found"); } else { @@ -114,7 +114,7 @@ static Pair diffInternal(Table actualResult, Table expectedResult, try (final SafeCloseableList safeCloseables = new SafeCloseableList(); final SharedContext expectedSharedContext = SharedContext.makeSharedContext(); final SharedContext actualSharedContext = SharedContext.makeSharedContext(); - final WritableBooleanChunk equalValues = WritableBooleanChunk.makeWritableChunk(chunkSize)) { + final WritableBooleanChunk equalValues = WritableBooleanChunk.makeWritableChunk(chunkSize)) { final ColumnDiffContext[] columnContexts = columnNamesForDiff.stream() .map(name -> safeCloseables.add(new ColumnDiffContext(name, expectedNameToColumnSource.get(name), @@ -231,7 +231,7 @@ private ColumnDiffContext(@NotNull final String name, */ private long diffChunk(@NotNull final RowSequence expectedChunkOk, @NotNull final RowSequence actualChunkOk, - @NotNull final WritableBooleanChunk equalValues, + @NotNull final WritableBooleanChunk equalValues, @NotNull final Set itemsToSkip, @NotNull final List issues, long position) { @@ -267,6 +267,13 @@ private long diffChunk(@NotNull final RowSequence expectedChunkOk, } else if (chunkType == ChunkType.Float) { final float expectedValue = expectedValues.asFloatChunk().get(ii); final float actualValue = actualValues.asFloatChunk().get(ii); + if (Float.isNaN(expectedValue) || Float.isNaN(actualValue)) { + final String actualString = Float.isNaN(actualValue) ? "NaN" : Float.toString(actualValue); + final String expectString = Float.isNaN(expectedValue) ? "NaN" : Float.toString(expectedValue); + issues.add("Column " + name + " different from the expected set, first difference at row " + + position + " encountered " + actualString + " expected " + expectString); + return position; + } if (expectedValue == io.deephaven.util.QueryConstants.NULL_FLOAT || actualValue == io.deephaven.util.QueryConstants.NULL_FLOAT) { final String actualString = actualValue == io.deephaven.util.QueryConstants.NULL_FLOAT ? "null" @@ -297,6 +304,14 @@ private long diffChunk(@NotNull final RowSequence expectedChunkOk, } else if (chunkType == ChunkType.Double) { final double expectedValue = expectedValues.asDoubleChunk().get(ii); final double actualValue = actualValues.asDoubleChunk().get(ii); + if (Double.isNaN(expectedValue) || Double.isNaN(actualValue)) { + final String actualString = Double.isNaN(actualValue) ? "NaN" : Double.toString(actualValue); + final String expectString = + Double.isNaN(expectedValue) ? "NaN" : Double.toString(expectedValue); + issues.add("Column " + name + " different from the expected set, first difference at row " + + position + " encountered " + actualString + " expected " + expectString); + return position; + } if (expectedValue == io.deephaven.util.QueryConstants.NULL_DOUBLE || actualValue == io.deephaven.util.QueryConstants.NULL_DOUBLE) { final String actualString = actualValue == io.deephaven.util.QueryConstants.NULL_DOUBLE ? "null" diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/BaseUpdateByTest.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/BaseUpdateByTest.java index 9dd2cb2b4cc..fcb3daec101 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/BaseUpdateByTest.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/BaseUpdateByTest.java @@ -1,6 +1,8 @@ package io.deephaven.engine.table.impl.updateby; import io.deephaven.datastructures.util.CollectionUtil; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.table.impl.AbstractColumnSource; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.testutil.ColumnInfo; import io.deephaven.engine.testutil.generator.*; @@ -9,10 +11,7 @@ import org.junit.Rule; import java.math.BigInteger; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Random; +import java.util.*; import static io.deephaven.engine.testutil.TstUtils.getTable; import static io.deephaven.engine.testutil.TstUtils.initColumnInfos; @@ -81,11 +80,11 @@ static CreateResult createTestTable(int tableSize, generators.toArray(new TestDataGenerator[0])); final QueryTable t = getTable(tableSize, random, columnInfos); - - // if (!isRefreshing && includeGroups) { - // final ColumnSource groupingSource = t.getColumnSource("Sym"); - // groupingSource.setGroupingProvider(StaticGroupingProvider.buildFrom(groupingSource, t.getRowSet())); - // } + if (!isRefreshing && includeGroups) { + final AbstractColumnSource groupingSource = (AbstractColumnSource) t.getColumnSource("Sym"); + final Map gtr = groupingSource.getValuesMapping(t.getRowSet()); + groupingSource.setGroupToRange(gtr); + } t.setRefreshing(isRefreshing); diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/TestRollingProduct.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/TestRollingProduct.java index e3b24089b7a..d01a1749111 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/TestRollingProduct.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/updateby/TestRollingProduct.java @@ -4,18 +4,17 @@ import io.deephaven.api.updateby.UpdateByControl; import io.deephaven.api.updateby.UpdateByOperation; import io.deephaven.base.verify.Assert; +import io.deephaven.datastructures.util.CollectionUtil; import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.context.QueryScope; +import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.table.ColumnSource; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.AbstractColumnSource; import io.deephaven.engine.table.impl.DataAccessHelpers; import io.deephaven.engine.table.impl.QueryTable; -import io.deephaven.engine.testutil.ControlledUpdateGraph; -import io.deephaven.engine.testutil.EvalNugget; -import io.deephaven.engine.testutil.GenerateTableUpdates; -import io.deephaven.engine.testutil.TstUtils; -import io.deephaven.engine.testutil.generator.CharGenerator; -import io.deephaven.engine.testutil.generator.SortedInstantGenerator; -import io.deephaven.engine.testutil.generator.TestDataGenerator; +import io.deephaven.engine.testutil.*; +import io.deephaven.engine.testutil.generator.*; import io.deephaven.engine.util.TableDiff; import io.deephaven.test.types.OutOfBandTest; import io.deephaven.time.DateTimeUtils; @@ -28,13 +27,12 @@ import java.math.BigInteger; import java.math.MathContext; import java.time.Duration; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.List; -import java.util.Random; +import java.util.*; import java.util.function.Function; import static io.deephaven.engine.testutil.GenerateTableUpdates.generateAppends; +import static io.deephaven.engine.testutil.TstUtils.getTable; +import static io.deephaven.engine.testutil.TstUtils.initColumnInfos; import static io.deephaven.engine.testutil.testcase.RefreshingTableTestCase.simulateShiftAwareStep; import static io.deephaven.function.Basic.isNull; @@ -102,6 +100,62 @@ private String[] getCastingFormulas(String[] columns) { .toArray(String[]::new); } + /** + * Create a custom test table where the values are small enough they won't overflow Double.MAX_VALUE when + * multiplied. This will allow the results to be verified with the Numeric#product() function. + */ + @SuppressWarnings({"rawtypes"}) + static CreateResult createSmallTestTable(int tableSize, + boolean includeSym, + boolean includeGroups, + boolean isRefreshing, + int seed, + String[] extraNames, + TestDataGenerator[] extraGenerators) { + if (includeGroups && !includeSym) { + throw new IllegalArgumentException(); + } + + final List colsList = new ArrayList<>(); + final List generators = new ArrayList<>(); + if (includeSym) { + colsList.add("Sym"); + generators.add(new SetGenerator<>("a", "b", "c", "d", null)); + } + + if (extraNames.length > 0) { + colsList.addAll(Arrays.asList(extraNames)); + generators.addAll(Arrays.asList(extraGenerators)); + } + + colsList.addAll(Arrays.asList("byteCol", "shortCol", "intCol", "longCol", "floatCol", "doubleCol", "boolCol", + "bigIntCol", "bigDecimalCol")); + generators.addAll(Arrays.asList(new ByteGenerator((byte) -1, (byte) 5, .1), + new ShortGenerator((short) -1, (short) 5, .1), + new IntGenerator(-1, 5, .1), + new LongGenerator(-1, 5, .1), + new FloatGenerator(-1, 5, .1), + new DoubleGenerator(-1, 5, .1), + new BooleanGenerator(.5, .1), + new BigIntegerGenerator(new BigInteger("-1"), new BigInteger("5"), .1), + new BigDecimalGenerator(new BigInteger("1"), new BigInteger("2"), 5, .1))); + + final Random random = new Random(seed); + final ColumnInfo[] columnInfos = initColumnInfos(colsList.toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY), + generators.toArray(new TestDataGenerator[0])); + final QueryTable t = getTable(tableSize, random, columnInfos); + + if (!isRefreshing && includeGroups) { + final AbstractColumnSource groupingSource = (AbstractColumnSource) t.getColumnSource("Sym"); + final Map gtr = groupingSource.getValuesMapping(t.getRowSet()); + groupingSource.setGroupToRange(gtr); + } + + t.setRefreshing(isRefreshing); + + return new CreateResult(t, columnInfos, random); + } + // region Object Helper functions final Function, BigInteger> prodBigInt = bigIntegerObjectVector -> { @@ -396,7 +450,7 @@ public void testStaticZeroKeyTimedFwdRev() { } private void doTestStaticZeroKey(final int prevTicks, final int postTicks) { - final QueryTable t = createTestTable(STATIC_TABLE_SIZE, true, false, false, 0x31313131, + final QueryTable t = createSmallTestTable(STATIC_TABLE_SIZE, true, false, false, 0x31313131, new String[] {"charCol"}, new TestDataGenerator[] {new CharGenerator('A', 'z', 0.1)}).t; @@ -411,7 +465,7 @@ private void doTestStaticZeroKey(final int prevTicks, final int postTicks) { } private void doTestStaticZeroKeyTimed(final Duration prevTime, final Duration postTime) { - final QueryTable t = createTestTable(STATIC_TABLE_SIZE, false, false, false, 0xFFFABBBC, + final QueryTable t = createSmallTestTable(STATIC_TABLE_SIZE, false, false, false, 0xFFFABBBC, new String[] {"ts", "charCol"}, new TestDataGenerator[] {new SortedInstantGenerator( DateTimeUtils.parseInstant("2022-03-09T09:00:00.000 NY"), DateTimeUtils.parseInstant("2022-03-09T16:30:00.000 NY")), diff --git a/replication/static/src/main/java/io/deephaven/replicators/ReplicateUpdateBy.java b/replication/static/src/main/java/io/deephaven/replicators/ReplicateUpdateBy.java index c5fb4853029..da86b6cb406 100644 --- a/replication/static/src/main/java/io/deephaven/replicators/ReplicateUpdateBy.java +++ b/replication/static/src/main/java/io/deephaven/replicators/ReplicateUpdateBy.java @@ -134,13 +134,17 @@ public static void main(String[] args) throws IOException { } } - files = ReplicatePrimitiveCode.charToAllButBoolean( - "engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/CharRollingProductOperator.java"); + files = ReplicatePrimitiveCode.charToIntegers( + "engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/CharRollingProductOperator.java", + exemptions); for (final String f : files) { - if (f.contains("Integer")) { + if (f.contains("Int")) { fixupInteger(f); } } + ReplicatePrimitiveCode.floatToAllFloatingPoints( + "engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/rollingproduct/FloatRollingProductOperator.java"); + files = ReplicatePrimitiveCode.charToAllButBoolean( "engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/delta/CharDeltaOperator.java", From 6b1df15068c5b009ce4f76890650ace77323c9a7 Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:58:23 -0600 Subject: [PATCH 32/41] R HTML docs source code (#4782) * Start docsite * More docsite, vignettes do not work in R studio * More docsite, need to fix vignettes * Done with docsite? * Use R CMD to install in r-build.sh * Change readme, not totally happy with this * Use R CMD build * Update cpp-clients-multi image SHA * Gradle tasks and script for building docsite * Update gradle.properties * Update readme * Update files * Remove mistaken comment --- R/build.gradle | 40 + R/r-build.sh | 20 +- R/r-site.sh | 25 + R/rdeephaven/.Rbuildignore | 5 + R/rdeephaven/.gitignore | 4 + R/rdeephaven/DESCRIPTION | 8 +- R/rdeephaven/R/agg_ops_wrapper.R | 292 ++- R/rdeephaven/R/client_wrapper.R | 34 +- R/rdeephaven/R/exports.R | 106 -- R/rdeephaven/R/operation_control.R | 3 +- R/rdeephaven/R/table_handle_wrapper.R | 4 +- R/rdeephaven/R/update_by_ops_wrapper.R | 542 +++--- R/rdeephaven/R/utility_functions.R | 18 +- R/rdeephaven/README.md | 23 +- R/rdeephaven/inst/tests/testthat/helper.R | 14 +- .../inst/tests/testthat/test_agg_by.R | 18 +- .../inst/tests/testthat/test_client_wrapper.R | 14 +- .../testthat/test_table_handle_wrapper.R | 8 +- .../inst/tests/testthat/test_table_ops.R | 30 +- .../inst/tests/testthat/test_update_by.R | 1610 ++++++++++------- .../testthat/test_update_by_ops_wrappers.R | 2 +- R/rdeephaven/man/AggBy.Rd | 101 -- R/rdeephaven/man/AggOp.Rd | 10 +- R/rdeephaven/man/Client.Rd | 2 +- R/rdeephaven/man/TableHandle.Rd | 4 +- R/rdeephaven/man/UpdateBy.Rd | 110 -- R/rdeephaven/man/UpdateByOp.Rd | 3 +- R/rdeephaven/man/agg_abs_sum.Rd | 11 +- R/rdeephaven/man/agg_avg.Rd | 11 +- R/rdeephaven/man/agg_count.Rd | 11 +- R/rdeephaven/man/agg_first.Rd | 11 +- R/rdeephaven/man/agg_last.Rd | 11 +- R/rdeephaven/man/agg_max.Rd | 11 +- R/rdeephaven/man/agg_median.Rd | 11 +- R/rdeephaven/man/agg_min.Rd | 11 +- R/rdeephaven/man/agg_percentile.Rd | 13 +- R/rdeephaven/man/agg_std.Rd | 11 +- R/rdeephaven/man/agg_sum.Rd | 11 +- R/rdeephaven/man/agg_var.Rd | 11 +- R/rdeephaven/man/agg_w_avg.Rd | 13 +- R/rdeephaven/man/rdeephaven.Rd | 101 -- R/rdeephaven/man/uby_cum_max.Rd | 11 +- R/rdeephaven/man/uby_cum_min.Rd | 11 +- R/rdeephaven/man/uby_cum_prod.Rd | 11 +- R/rdeephaven/man/uby_cum_sum.Rd | 11 +- R/rdeephaven/man/uby_delta.Rd | 11 +- R/rdeephaven/man/uby_ema_tick.Rd | 13 +- R/rdeephaven/man/uby_ema_time.Rd | 13 +- R/rdeephaven/man/uby_emmax_tick.Rd | 13 +- R/rdeephaven/man/uby_emmax_time.Rd | 13 +- R/rdeephaven/man/uby_emmin_tick.Rd | 13 +- R/rdeephaven/man/uby_emmin_time.Rd | 13 +- R/rdeephaven/man/uby_ems_tick.Rd | 13 +- R/rdeephaven/man/uby_ems_time.Rd | 13 +- R/rdeephaven/man/uby_emstd_tick.Rd | 13 +- R/rdeephaven/man/uby_emstd_time.Rd | 13 +- R/rdeephaven/man/uby_forward_fill.Rd | 11 +- R/rdeephaven/man/uby_rolling_avg_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_avg_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_count_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_count_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_group_tick.Rd | 11 +- R/rdeephaven/man/uby_rolling_group_time.Rd | 11 +- R/rdeephaven/man/uby_rolling_max_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_max_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_min_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_min_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_prod_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_prod_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_std_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_std_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_sum_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_sum_time.Rd | 13 +- R/rdeephaven/man/uby_rolling_wavg_tick.Rd | 13 +- R/rdeephaven/man/uby_rolling_wavg_time.Rd | 13 +- R/rdeephaven/pkgdown/_pkgdown.yml | 47 + R/rdeephaven/vignettes/.gitignore | 2 + R/rdeephaven/vignettes/agg_by.Rmd | 120 ++ R/rdeephaven/vignettes/rdeephaven.Rmd | 130 ++ R/rdeephaven/vignettes/update_by.Rmd | 133 ++ .../cpp-clients-multi-base/gradle.properties | 2 +- 81 files changed, 2268 insertions(+), 1890 deletions(-) create mode 100644 R/r-site.sh create mode 100644 R/rdeephaven/.Rbuildignore delete mode 100644 R/rdeephaven/man/AggBy.Rd delete mode 100644 R/rdeephaven/man/UpdateBy.Rd delete mode 100644 R/rdeephaven/man/rdeephaven.Rd create mode 100644 R/rdeephaven/pkgdown/_pkgdown.yml create mode 100644 R/rdeephaven/vignettes/.gitignore create mode 100644 R/rdeephaven/vignettes/agg_by.Rmd create mode 100644 R/rdeephaven/vignettes/rdeephaven.Rmd create mode 100644 R/rdeephaven/vignettes/update_by.Rmd diff --git a/R/build.gradle b/R/build.gradle index 96c00db6d86..83f26e670e7 100644 --- a/R/build.gradle +++ b/R/build.gradle @@ -42,6 +42,7 @@ def buildRClient = Docker.registerDockerTask(project, 'rClient') { include 'rdeephaven/R/**' include 'rdeephaven/src/*.cpp' include 'rdeephaven/src/Makevars' + include 'rdeephaven/vignettes/*.Rmd' } } dockerfile { @@ -64,6 +65,7 @@ def buildRClient = Docker.registerDockerTask(project, 'rClient') { copyFile('rdeephaven/R/', "${prefix}/src/rdeephaven/R/") copyFile('rdeephaven/src/*.cpp', "${prefix}/src/rdeephaven/src/") copyFile('rdeephaven/src/Makevars', "${prefix}/src/rdeephaven/src/") + copyFile('rdeephaven/vignettes/*.Rmd', "${prefix}/src/rdeephaven/vignettes/") copyFile('r-build.sh', "${prefix}/bin/rdeephaven") runCommand("PREFIX=${prefix}; " + '''set -eux ; \ @@ -155,5 +157,43 @@ def rClientDoc = Docker.registerDockerTask(project, 'rClientDoc') { containerOutPath = "${prefix}/src/rdeephaven/man" } +def rClientSite = Docker.registerDockerTask(project, 'rClientSite') { + // Only tested on x86-64, and we only build dependencies for x86-64 + platform = 'linux/amd64' + copyIn { + from(layout.projectDirectory) { + include 'r-site.sh' + include 'rdeephaven/man/**' + } + } + copyOut { + into layout.projectDirectory.dir('rdeephaven/docs') + } + dockerfile { + from('deephaven/r-client-doc:local-build') + // We need the contents of 'man' to build the docsite + copyFile('rdeephaven/man/**', "${prefix}/src/rdeephaven/man/") + runCommand("mkdir -p ${prefix}/src/rdeephaven/docs") + runCommand('''echo "status = tryCatch(" \ + " {" \ + " install.packages('pkgdown', repos='http://cran.us.r-project.org'); " \ + " 0" \ + " }," \ + " error=function(e) 1," \ + " warning=function(w) 2" \ + ");" \ + "print(paste0('status=', status));" \ + "quit(save='no', status=status)" | \ + MAKE="make -j`getconf _NPROCESSORS_ONLN`" R --no-save --no-restore + ''') + // Keep this after the package installs above; + // it is likely it changes more frequently. + copyFile('r-site.sh', "${prefix}/bin/rdeephaven") + } + parentContainers = [ project.tasks.getByName('rClientDoc') ] + entrypoint = ["${prefix}/bin/rdeephaven/r-site.sh"] + containerOutPath = "${prefix}/src/rdeephaven/docs" +} + deephavenDocker.shouldLogIfTaskFails testRClient tasks.check.dependsOn(testRClient) diff --git a/R/r-build.sh b/R/r-build.sh index 910e3bb43f5..273f67e37ad 100755 --- a/R/r-build.sh +++ b/R/r-build.sh @@ -18,18 +18,14 @@ fi trap 'rm -f src/*.o src/*.so' 1 2 15 rm -f src/*.o src/*.so -MAKE="make -j${NCPUS}" R --no-save --no-restore <&2 + exit 1 +fi + +source $DH_PREFIX/env.sh + +cd $DH_PREFIX/src/rdeephaven + +R --no-save --no-restore <= 3.5.3) Imports: Rcpp (>= 1.0.10), arrow (>= 12.0.0), R6 (>= 2.5.0), dplyr (>= 1.1.0), utils (>= 3.5.3) LinkingTo: Rcpp -Suggests: testthat (>= 3.0.0), lubridate (>= 1.9.0), zoo (>= 1.8-0) +Suggests: + testthat (>= 3.0.0), + lubridate (>= 1.9.0), + zoo (>= 1.8-0), + knitr, + rmarkdown Config/testthat/edition: 3 RoxygenNote: 7.2.3 +VignetteBuilder: knitr diff --git a/R/rdeephaven/R/agg_ops_wrapper.R b/R/rdeephaven/R/agg_ops_wrapper.R index 744fe57ac60..373e3292826 100644 --- a/R/rdeephaven/R/agg_ops_wrapper.R +++ b/R/rdeephaven/R/agg_ops_wrapper.R @@ -1,118 +1,11 @@ -# An AggOp represents an aggregation operator that can be passed to agg_by() or agg_all_by(). This is the return type -# of all of the agg functions. It is a wrapper around an Rcpp_INTERNAL_AggOp, which itself is a wrapper around a -# C++ AggregateWrapper, which is finally a wrapper around a C++ Aggregate. See rdeephaven/src/client.cpp for details. -# Note that AggOps should not be instantiated directly by user code, but rather by provided agg functions. - - -#' @name -#' AggBy -#' @title -#' Aggregations in Deephaven -#' @md -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @description -#' Table aggregations are a quintessential feature of Deephaven. You can apply as many aggregations as -#' needed to static tables _or_ streaming tables, and if the parent tables are streaming, the resulting aggregated -#' tables will update alongside their parent tables. It is also very easy to perform _grouped_ aggregations, which -#' allow you to aggregate tables on a per-group basis. -#' -#' @section -#' Apply aggregations to a table: -#' There are two methods for performing aggregations on a table, `agg_by()` and `agg_all_by()`. `agg_by()` allows you to -#' perform many aggregations on specified columns, while `agg_all_by()` allows you to perform a single aggregation to -#' every non-grouping column in the table. Both methods have an optional `by` parameter that is used to specify grouping columns. -#' Here are some details on each method: -#' -#' - `TableHandle$agg_by(aggs, by)`: Creates a new table containing grouping columns and grouped data. -#' The resulting grouped data is defined by the aggregation(s) specified. -#' - `TableHandle$agg_all_by(agg, by)`: Creates a new table containing grouping columns and grouped data. -#' The resulting grouped data is defined by the aggregation specified. This method applies the aggregation to all -#' non-grouping columns of the table, so it can only accept one aggregation at a time. -#' -#' The `agg_by()` and `agg_all_by()` methods themselves do not know anything about the columns on which you want to -#' perform aggregations. Rather, the desired columns are passed to individual `agg` functions, enabling you to apply -#' various kinds of aggregations to different columns or groups of columns as needed. -#' -#' @section -#' `agg` functions: -#' `agg` functions are used to perform aggregation calculations on grouped data by passing them to `agg_by()` or -#' `agg_all_by()`. These functions are _generators_, meaning they return _functions_ that the Deephaven engine knows -#' how to interpret. We call the functions that they return [`AggOp`][AggOp]s. These `AggOp`s are not R-level functions, -#' but Deephaven-specific data types that perform all of the intensive calculations. Here is a list of all `agg` functions -#' available in Deephaven: -#' -#' - [`agg_first()`][agg_first] -#' - [`agg_last()`][agg_last] -#' - [`agg_min()`][agg_min] -#' - [`agg_max()`][agg_max] -#' - [`agg_sum()`][agg_sum] -#' - [`agg_abs_sum()`][agg_abs_sum] -#' - [`agg_avg()`][agg_avg] -#' - [`agg_w_avg()`][agg_w_avg] -#' - [`agg_median()`][agg_median] -#' - [`agg_var()`][agg_var] -#' - [`agg_std()`][agg_std] -#' - [`agg_percentile()`][agg_percentile] -#' - [`agg_count()`][agg_count] -#' -#' For more details on each aggregation function, click on one of the methods above or see the reference documentation -#' by running `?agg_first`, `?agg_last`, etc. -#' -#' @examples -#' \dontrun{ -#' library(rdeephaven) -#' -#' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") -#' -#' # create data frame, push to server, retrieve TableHandle -#' df <- data.frame( -#' X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), -#' Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), -#' Number1 = c(100, -44, 49, 11, -66, 50, 29, 18, -70), -#' Number2 = c(-55, 76, 20, 130, 230, -50, 73, 137, 214) -#' ) -#' th <- client$import_table(df) -#' -#' # get first and last elements of each column -#' th1 <- th$ -#' agg_by(agg_first(c("XFirst = X", "YFirst = Y", "Number1First = Number1", "Number2First = Number2")), -#' agg_last(c("XLast = X", "YLast = Y", "Number1Last = Number1", "Number2Last = Number2"))) -#' -#' # compute mean and standard deviation of Number1 and Number2, grouped by X -#' th2 <- th$ -#' agg_by( -#' c(agg_avg(c("Number1Avg = Number1", "Number2Avg = Number2")), -#' agg_std(c("Number1Std = Number1", "Number2Std = Number2"))), -#' by="X") -#' -#' # compute maximum of all non-grouping columns, grouped by X and Y -#' th3 <- th$ -#' agg_all_by(agg_max(), by=c("X", "Y")) -#' -#' # compute minimum and maximum of Number1 and Number2 respectively grouped by Y -#' th4 <- th$ -#' agg_by( -#' c(agg_min("Number1Min = Number1"), -#' agg_max("Number2Max = Number2")), -#' by="Y") -#' -#' client$close() -#' } -#' -NULL - - -#' Name AggOp +#' @name AggOp #' @title Deephaven AggOps #' @md #' @description -#' An `AggOp` is the return type of one of Deephaven's [`agg`][AggBy] functions. It is a function that performs the +#' An `AggOp` is the return type of one of Deephaven's `agg` functions. It is a function that performs the #' computation specified by the `agg` function. These are intended to be passed directly to `agg_by()` or `agg_all_by()`, -#' and should never be instantiated directly be user code. +#' and should never be instantiated directly be user code. For more information, see the +#' vignette on `agg` functions with `vignette("agg_by")`. #' #' If multiple tables have the same schema and the same aggregations need to be applied to each table, saving these #' objects directly in a variable may be useful to avoid having to re-create them each time: @@ -123,7 +16,7 @@ NULL #' result1 <- th1$agg_by(aggregations, by="Group") #' result2 <- th2$agg_by(aggregations, by="Group") #' ``` -#' In this example, `aggregations` would be a vector of two `AggOp`s that can be reused in multiple calls to `agg_by()`. +#' In this example, `aggregations` would be a vector of two AggOps that can be reused in multiple calls to `agg_by()`. #' #' @usage NULL #' @format NULL @@ -166,17 +59,20 @@ AggOp <- R6Class("AggOp", #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -193,11 +89,11 @@ AggOp <- R6Class("AggOp", #' #' # get first elements of Y, Number1, and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_first(c("Y", "Number1", "Number2")), by="X") +#' agg_by(agg_first(c("Y", "Number1", "Number2")), by = "X") #' #' # get first elements of Number1 and Number2 grouped by X and Y #' th3 <- th -#' agg_by(agg_first(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_first(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -205,7 +101,7 @@ AggOp <- R6Class("AggOp", #' @export agg_first <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_first, "agg_first", cols=cols)) + return(AggOp$new(INTERNAL_agg_first, "agg_first", cols = cols)) } #' @name @@ -227,17 +123,20 @@ agg_first <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -254,11 +153,11 @@ agg_first <- function(cols = character()) { #' #' # get last elements of Y, Number1, and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_last(c("Y", "Number1", "Number2")), by="X") +#' agg_by(agg_last(c("Y", "Number1", "Number2")), by = "X") #' #' # get last elements of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_last(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_last(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -266,7 +165,7 @@ agg_first <- function(cols = character()) { #' @export agg_last <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_last, "agg_last", cols=cols)) + return(AggOp$new(INTERNAL_agg_last, "agg_last", cols = cols)) } #' @name @@ -288,17 +187,20 @@ agg_last <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -315,11 +217,11 @@ agg_last <- function(cols = character()) { #' #' # get minimum elements of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_min(c("Number1", "Number2")), by="X") +#' agg_by(agg_min(c("Number1", "Number2")), by = "X") #' #' # get minimum elements of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_min(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_min(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -327,7 +229,7 @@ agg_last <- function(cols = character()) { #' @export agg_min <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_min, "agg_min", cols=cols)) + return(AggOp$new(INTERNAL_agg_min, "agg_min", cols = cols)) } #' @name @@ -349,17 +251,20 @@ agg_min <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -376,11 +281,11 @@ agg_min <- function(cols = character()) { #' #' # get maximum elements of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_max(c("Number1", "Number2")), by="X") +#' agg_by(agg_max(c("Number1", "Number2")), by = "X") #' #' # get maximum elements of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_max(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_max(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -388,7 +293,7 @@ agg_min <- function(cols = character()) { #' @export agg_max <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_max, "agg_max", cols=cols)) + return(AggOp$new(INTERNAL_agg_max, "agg_max", cols = cols)) } #' @name @@ -411,16 +316,19 @@ agg_max <- function(cols = character()) { #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. #' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. +#' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -437,11 +345,11 @@ agg_max <- function(cols = character()) { #' #' # compute sum of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_sum(c("Number1", "Number2")), by="X") +#' agg_by(agg_sum(c("Number1", "Number2")), by = "X") #' #' # compute sum of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_sum(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_sum(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -449,7 +357,7 @@ agg_max <- function(cols = character()) { #' @export agg_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_sum, "agg_sum", cols=cols)) + return(AggOp$new(INTERNAL_agg_sum, "agg_sum", cols = cols)) } #' @name @@ -471,17 +379,20 @@ agg_sum <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -498,11 +409,11 @@ agg_sum <- function(cols = character()) { #' #' # compute absolute sum of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_abs_sum(c("Number1", "Number2")), by="X") +#' agg_by(agg_abs_sum(c("Number1", "Number2")), by = "X") #' #' # compute absolute sum of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_abs_sum(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_abs_sum(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -510,7 +421,7 @@ agg_sum <- function(cols = character()) { #' @export agg_abs_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_abs_sum, "agg_abs_sum", cols=cols)) + return(AggOp$new(INTERNAL_agg_abs_sum, "agg_abs_sum", cols = cols)) } #' @name @@ -532,17 +443,20 @@ agg_abs_sum <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -559,11 +473,11 @@ agg_abs_sum <- function(cols = character()) { #' #' # compute average of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_avg(c("Number1", "Number2")), by="X") +#' agg_by(agg_avg(c("Number1", "Number2")), by = "X") #' #' # compute average of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_avg(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_avg(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -571,7 +485,7 @@ agg_abs_sum <- function(cols = character()) { #' @export agg_avg <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_avg, "agg_avg", cols=cols)) + return(AggOp$new(INTERNAL_agg_avg, "agg_avg", cols = cols)) } #' @name @@ -593,18 +507,21 @@ agg_avg <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param wcol String denoting the column to use for weights. This must be a numeric column. #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -617,15 +534,15 @@ agg_avg <- function(cols = character()) { #' #' # compute weighted average of Number1, weighted by Number2 #' th1 <- th$ -#' agg_by(agg_w_avg(wcol="Number2", cols="Number1")) +#' agg_by(agg_w_avg(wcol = "Number2", cols = "Number1")) #' #' # compute weighted average of Number1, weighted by Number2, grouped by X #' th2 <- th$ -#' agg_by(agg_w_avg(wcol="Number2", cols="Number1", by="X")) +#' agg_by(agg_w_avg(wcol = "Number2", cols = "Number1", by = "X")) #' #' # compute weighted average of Number1, weighted by Number2, grouped by X and Y #' th3 <- th$ -#' agg_by(agg_w_avg(wcol="Number2", cols="Number1", by=c("X", "Y"))) +#' agg_by(agg_w_avg(wcol = "Number2", cols = "Number1", by = c("X", "Y"))) #' #' client$close() #' } @@ -634,7 +551,7 @@ agg_avg <- function(cols = character()) { agg_w_avg <- function(wcol, cols = character()) { verify_string("wcol", wcol, TRUE) verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_w_avg, "agg_w_avg", wcol=wcol, cols=cols)) + return(AggOp$new(INTERNAL_agg_w_avg, "agg_w_avg", wcol = wcol, cols = cols)) } #' @name @@ -656,17 +573,20 @@ agg_w_avg <- function(wcol, cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -683,11 +603,11 @@ agg_w_avg <- function(wcol, cols = character()) { #' #' # compute median of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_median(c("Number1", "Number2")), by="X") +#' agg_by(agg_median(c("Number1", "Number2")), by = "X") #' #' # compute median of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_median(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_median(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -695,7 +615,7 @@ agg_w_avg <- function(wcol, cols = character()) { #' @export agg_median <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_median, "agg_median", cols=cols)) + return(AggOp$new(INTERNAL_agg_median, "agg_median", cols = cols)) } #' @name @@ -717,17 +637,20 @@ agg_median <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -744,11 +667,11 @@ agg_median <- function(cols = character()) { #' #' # compute variance of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_var(c("Number1", "Number2")), by="X") +#' agg_by(agg_var(c("Number1", "Number2")), by = "X") #' #' # compute variance of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_var(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_var(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -756,7 +679,7 @@ agg_median <- function(cols = character()) { #' @export agg_var <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_var, "agg_var", cols=cols)) + return(AggOp$new(INTERNAL_agg_var, "agg_var", cols = cols)) } #' @name @@ -778,17 +701,20 @@ agg_var <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -805,11 +731,11 @@ agg_var <- function(cols = character()) { #' #' # compute standard deviation of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_std(c("Number1", "Number2")), by="X") +#' agg_by(agg_std(c("Number1", "Number2")), by = "X") #' #' # compute standard deviation of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_std(c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_std(c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -817,7 +743,7 @@ agg_var <- function(cols = character()) { #' @export agg_std <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_std, "agg_std", cols=cols)) + return(AggOp$new(INTERNAL_agg_std, "agg_std", cols = cols)) } #' @name @@ -839,18 +765,21 @@ agg_std <- function(cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' @param percentile Numeric scalar between 0 and 1 denoting the percentile to compute. #' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. #' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. -#' @return `AggOp` to be used in a call to `agg_by()` or `agg_all_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()` or `agg_all_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -863,15 +792,15 @@ agg_std <- function(cols = character()) { #' #' # compute 20th percentile of Number1 and Number2 #' th1 <- th$ -#' agg_by(agg_percentile(percentile=0.2, cols=c("Number1", "Number2"))) +#' agg_by(agg_percentile(percentile = 0.2, cols = c("Number1", "Number2"))) #' #' # compute 50th percentile of Number1 and Number2 grouped by X #' th2 <- th$ -#' agg_by(agg_percentile(percentile=0.5, cols=c("Number1", "Number2")), by="X") +#' agg_by(agg_percentile(percentile = 0.5, cols = c("Number1", "Number2")), by = "X") #' #' # compute 75th percentile of Number1 and Number2 grouped by X and Y #' th3 <- th$ -#' agg_by(agg_percentile(percentile=0.75, cols=c("Number1", "Number2")), by=c("X", "Y")) +#' agg_by(agg_percentile(percentile = 0.75, cols = c("Number1", "Number2")), by = c("X", "Y")) #' #' client$close() #' } @@ -880,7 +809,7 @@ agg_std <- function(cols = character()) { agg_percentile <- function(percentile, cols = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("cols", cols, FALSE) - return(AggOp$new(INTERNAL_agg_percentile, "agg_percentile", percentile=percentile, cols=cols)) + return(AggOp$new(INTERNAL_agg_percentile, "agg_percentile", percentile = percentile, cols = cols)) } #' @name @@ -902,18 +831,21 @@ agg_percentile <- function(percentile, cols = character()) { #' function called an [`AggOp`][AggOp] intended to be used in a call to `agg_by()` or `agg_all_by()`. This detail is #' typically hidden from the user. However, it is important to understand this detail for debugging purposes, #' as the output of an `agg` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `agg` functions by running +#' `vignette("agg_by")`. #' #' Note that this operation is not supported in `agg_all_by()`. #' #' @param col String denoting the name of the new column to hold the counts of each aggregation group. -#' @return `AggOp` to be used in a call to `agg_by()`. +#' @return [`AggOp`][AggOp] to be used in a call to `agg_by()`. #' #' @examples #' \dontrun{ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( @@ -926,11 +858,11 @@ agg_percentile <- function(percentile, cols = character()) { #' #' # count number of elements in each group when grouped by X, name resulting column "count" #' th1 <- th$ -#' agg_by(agg_count("count"), by="X") +#' agg_by(agg_count("count"), by = "X") #' #' # count number of elements in each group when grouped by X and Y, name resulting column "CountingCol" #' th2 <- th$ -#' agg_by(agg_count("CountingCol"), by=c("X", "Y")) +#' agg_by(agg_count("CountingCol"), by = c("X", "Y")) #' #' client$close() #' } @@ -938,5 +870,5 @@ agg_percentile <- function(percentile, cols = character()) { #' @export agg_count <- function(col) { verify_string("col", col, TRUE) - return(AggOp$new(INTERNAL_agg_count, "agg_count", col=col)) -} \ No newline at end of file + return(AggOp$new(INTERNAL_agg_count, "agg_count", col = col)) +} diff --git a/R/rdeephaven/R/client_wrapper.R b/R/rdeephaven/R/client_wrapper.R index 3d91ce9bc7a..538bede0b92 100644 --- a/R/rdeephaven/R/client_wrapper.R +++ b/R/rdeephaven/R/client_wrapper.R @@ -13,7 +13,7 @@ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create a data frame and push it to the server, retrieve a reference to it as a TableHandle #' df <- data.frame( @@ -66,25 +66,26 @@ Client <- R6Class("Client", args <- list(...) if (length(args) == 1) { first_arg <- args[[1]] - first_arg_class = first_class(first_arg) + first_arg_class <- first_class(first_arg) if (first_arg_class != "character" && first_arg_class != "list") { if (first_arg_class != "externalptr") { stop(paste0( "Client initialize first argument must be ", - "either a string or an Rcpp::XPtr object.")) + "either a string or an Rcpp::XPtr object." + )) } return(self$initialize_for_xptr(first_arg)) } } return(do.call(self$initialize_for_target, args)) }, - + #' @description #' Initializes a Client object using a pointer to an existing client connection. #' @param xptr External pointer to an existing client connection. initialize_for_xptr = function(xptr) { verify_type("xptr", xptr, TRUE, "externalptr", "an XPtr") - self$.internal_rcpp_object = new(INTERNAL_Client, xptr) + self$.internal_rcpp_object <- new(INTERNAL_Client, xptr) }, #' @description @@ -112,18 +113,17 @@ Client <- R6Class("Client", #' grpc channel creation. Defaults to an empty list, which implies not using any channel options. #' @param extra_headers List of name-value pairs for additional headers and values #' to add to server requests. Defaults to an empty list, which implies not using any extra headers. - initialize_for_target = function( - target, - auth_type = "anonymous", - username = "", - password = "", - auth_token = "", - session_type = "python", - use_tls = FALSE, - tls_root_certs = "", - int_options = list(), - string_options = list(), - extra_headers = list()) { + initialize_for_target = function(target, + auth_type = "anonymous", + username = "", + password = "", + auth_token = "", + session_type = "python", + use_tls = FALSE, + tls_root_certs = "", + int_options = list(), + string_options = list(), + extra_headers = list()) { options <- new(INTERNAL_ClientOptions) verify_string("target", target, TRUE) diff --git a/R/rdeephaven/R/exports.R b/R/rdeephaven/R/exports.R index 2fbe0011cb2..81792ad68c5 100644 --- a/R/rdeephaven/R/exports.R +++ b/R/rdeephaven/R/exports.R @@ -1,107 +1 @@ -#' @import Rcpp -#' @useDynLib rdeephaven, .registration = TRUE -#' @importFrom Rcpp evalCpp -#' -#' @importFrom arrow arrow_table as_arrow_table as_record_batch_reader -#' @importFrom R6 R6Class -#' @importFrom dplyr as_tibble as_data_frame -#' @importFrom utils head tail - -#' @name -#' rdeephaven -#' @title -#' The Deephaven Community R Client -#' @md -#' @usage NULL -#' @format NULL -#' -#' @description -#' The Deephaven Community R Client provides an R interface to Deephaven's powerful real-time data engine, [_Deephaven Core_](https://deephaven.io/community/). -#' To use this package, you must have a Deephaven server running and be able to connect to it. For more information on -#' how to set up a Deephaven server, see the documentation [here](https://deephaven.io/core/docs/tutorials/quickstart/). -#' -#' @section -#' Building blocks of the Deephaven R Client: -#' There are two primary R classes that make up the Deephaven R Client, the [`Client`][Client] class and the -#' [`TableHandle`][TableHandle] class. The `Client` class is used to establish a connection to the Deephaven server with -#' its constructor `Client$new()`, and to send server requests, such as running a script via `run_script()`, or pushing -#' local data to the server via `import_table()`. Basic usage of the `Client` class may look something like this: -#' ```r -#' library(rdeephaven) -#' -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") -#' ``` -#' Many of these server requests end up creating or modifying tables that live on the server. To keep track of these -#' tables, the R client retrieves references to them, and wraps these references in `TableHandle` objects. These -#' TableHandles have a host of methods that mirror server-side table operations, such as `head()`, `tail()`, `update()`, -#' and so on. So, you can typically use TableHandles _as if_ they are tables themselves, and all of the corresponding -#' methods that you call on them will be executed on the server. Here is a simple example of pushing data to the server, -#' retrieving a TableHandle to the resulting table, and applying some basic table operations to the table: -#' ```r -#' df1 <- data.frame(x=1:10, y=11:20) -#' th1 <- client$import_table(df1) -#' -#' th2 <- th1$ -#' update("z = x + y")$ -#' where("z % 4 == 0") -#' ``` -#' TableHandles also support common functional methods for converting server-side Deephaven tables to R objects stored in -#' local memory such as `as.data.frame()`, `as_tibble()`, and `as_arrow_table()`. Here's an example of converting the -#' table created above to an R data frame and verifying that other functional methods work as expected: -#' ```r -#' df2 <- as.data.frame(th2) -#' -#' print(nrow(th2) == nrow(df2)) -#' print(ncol(th2) == ncol(df2)) -#' print(dim(th2) == dim(df2)) -#' print(all(as.data.frame(head(th2, 2)) == head(df2, 2))) -#' print(all(as.data.frame(tail(th2, 2)) == tail(df2, 2))) -#' ``` -#' For more information on these classes and all of their methods, see the reference documentation for [`Client`][Client] -#' and [`TableHandle`][TableHandle] by clicking on their class names, or by running `?Client` or `?TableHandle`. -#' -#' @section -#' Real-time data analysis: -#' Since TableHandles are references to tables living on the Deephaven server, they may refer to streaming tables, or -#' tables that are receiving new data periodically (typically once per second). Here's a simple example of creating a -#' table that adds a new row every second: -#' ```r -#' th3 <- client$time_table("PT1s")$ -#' update(c("X = ii", "Y = sin(X)")) -#' ``` -#' R objects like data frames or Dplyr tibbles do not have this streaming property - they are always static objects -#' stored in memory. However, a TableHandle referring to a streaming table may be converted to a data frame or tibble at -#' any time, and the resulting object will be a snapshot of the table at the time of conversion. This means that you can -#' use the Deephaven R Client to perform real-time data analysis on streaming data! Here, we make a simple plot of the -#' ticking table, and call it three times to demonstrate the dynamic nature of the table: -#' ```r -#' plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type="l") -#' Sys.sleep(5) -#' plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type="l") -#' Sys.sleep(5) -#' plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type="l") -#' ``` -#' There are performance and memory considerations when pulling data from the server, so it is best to use the provided -#' TableHandle methods to perform as much of your analysis as possible on the server, and to only pull the data when -#' something _must_ be done in R, like plotting or writing to a local file. -#' -#' @section -#' Powerful table operations: -#' Much of the power of Deephaven's suite of table operations is achieved through the use of the [`update_by()`][UpdateBy] -#' and [`agg_by()`][AggBy] methods. These table methods are important enough to warrant their own documentation pages, accessible -#' by clicking on their names, or by running `?UpdateBy` or `?AggBy`. These methods come with their own suites of functions, -#' prefixed with `agg_` and `uby_` respectively, that are discoverable from their documentation pages. Running `ls("package:rdeephaven")` -#' will reveal that most of the functions included in this package are for these methods, so it is important to get acquainted -#' with them. -#' -#' @section -#' Getting help: -#' While we've done our best to provide good documentation for this package, you may find you need more help than what -#' this documentation has to offer. Please visit the official Deephaven Community Core [documentation](https://deephaven.io/core/docs/tutorials/quickstart/) -#' to learn more about Deephaven and to find comprehensive examples. Additionally, feel free to reach out to us on -#' the Deephaven [Community Slack channel](https://deephaven.io/slack) with any questions. -#' We hope you find real-time data analysis in R to be as easy as possible. -#' -NULL - loadModule("DeephavenInternalModule", TRUE) diff --git a/R/rdeephaven/R/operation_control.R b/R/rdeephaven/R/operation_control.R index 083085acc78..f376a79a1ca 100644 --- a/R/rdeephaven/R/operation_control.R +++ b/R/rdeephaven/R/operation_control.R @@ -94,8 +94,7 @@ OperationControl <- R6Class("OperationControl", #' print("hello!") #' #' @export -op_control <- function(on_null="skip", on_nan="skip", big_value_context="decimal128") { - +op_control <- function(on_null = "skip", on_nan = "skip", big_value_context = "decimal128") { if (!(on_null %in% c("poison", "reset", "skip", "throw"))) { stop(paste0("'on_null' must be one of 'poison', 'reset', 'skip', or 'throw'. Got '", on_null, "'.")) } diff --git a/R/rdeephaven/R/table_handle_wrapper.R b/R/rdeephaven/R/table_handle_wrapper.R index cb474579049..7be2a601b90 100644 --- a/R/rdeephaven/R/table_handle_wrapper.R +++ b/R/rdeephaven/R/table_handle_wrapper.R @@ -56,12 +56,12 @@ #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create a data frame, push it to the server, and retrieve a TableHandle referencing the new table #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:50], -#' boolCol = sample(c(TRUE,FALSE), 50, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 50, TRUE), #' col1 = sample(1000, size = 50, replace = TRUE), #' col2 = sample(1000, size = 50, replace = TRUE), #' col3 = 1:50 diff --git a/R/rdeephaven/R/update_by_ops_wrapper.R b/R/rdeephaven/R/update_by_ops_wrapper.R index 2ca32ef7411..a27af272b62 100644 --- a/R/rdeephaven/R/update_by_ops_wrapper.R +++ b/R/rdeephaven/R/update_by_ops_wrapper.R @@ -3,126 +3,14 @@ # C++ UpdateByOpWrapper, which is finally a wrapper around a C++ UpdateByOperation. See rdeephaven/src/client.cpp for details. # Note that UpdateByOps should not be instantiated directly by user code, but rather by provided uby functions. - -#' @name -#' UpdateBy -#' @title -#' Deephaven's UpdateBy Operations -#' @md -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @description -#' Deephaven's `update_by()` table method and suite of `uby` functions enable cumulative and moving calculations -#' on static _and_ streaming tables. Complex operations like cumulative minima and maxima, exponential moving averages, -#' and rolling standard deviations are all possible and effortless to execute. As always in Deephaven, -#' the results of these calculations will continue to update as their parent tables are updated. Additionally, it's easy -#' to group data by one or more columns, enabling complex group-wise calculations with a single line of code. -#' -#' @section -#' Applying UpdateBy operations to a table: -#' The table method `update_by()` is the entry point for UpdateBy operations. It takes two arguments: the first is an -#' [`UpdateByOp`][UpdateByOp] or a list of `UpdateByOp`s denoting the calculations to perform on specific columns of the -#' table. Then, it takes a column name or a list of column names that define the groups on which to perform the calculations. -#' If you don't want grouped calculations, omit this argument. -#' -#' The `update_by()` method itself does not know anything about the columns on which you want to perform calculations. -#' Rather, the desired columns are passed to individual `uby` functions, enabling a massive amount of flexibility. -#' -#' @section -#' `uby` functions: -#' `uby` functions are the workers that actually execute the complex UpdateBy calculations. These functions are -#' _generators_, meaning they return _functions_ that the Deephaven engine knows how to interpret. We call the functions -#' that they return [`UpdateByOp`][UpdateByOp]s. These `UpdateByOp`s are not R-level functions, but Deephaven-specific -#' data types that perform all of the intensive calculations. Here is a list of all `uby` functions available in Deephaven: -#' -#' - [`uby_cum_min()`][uby_cum_min] -#' - [`uby_cum_max()`][uby_cum_max] -#' - [`uby_cum_sum()`][uby_cum_sum] -#' - [`uby_cum_prod()`][uby_cum_prod] -#' - [`uby_forward_fill()`][uby_forward_fill] -#' - [`uby_delta()`][uby_delta] -#' - [`uby_emmin_tick()`][uby_emmin_tick] -#' - [`uby_emmin_time()`][uby_emmin_time] -#' - [`uby_emmax_tick()`][uby_emmax_tick] -#' - [`uby_emmax_time()`][uby_emmax_time] -#' - [`uby_ems_tick()`][uby_ems_tick] -#' - [`uby_ems_time()`][uby_ems_time] -#' - [`uby_ema_tick()`][uby_ema_tick] -#' - [`uby_ema_time()`][uby_ema_time] -#' - [`uby_emstd_tick()`][uby_emstd_tick] -#' - [`uby_emstd_time()`][uby_emstd_time] -#' - [`uby_rolling_count_tick()`][uby_rolling_count_tick] -#' - [`uby_rolling_count_time()`][uby_rolling_count_time] -#' - [`uby_rolling_group_tick()`][uby_rolling_group_tick] -#' - [`uby_rolling_group_time()`][uby_rolling_group_time] -#' - [`uby_rolling_min_tick()`][uby_rolling_min_tick] -#' - [`uby_rolling_min_time()`][uby_rolling_min_time] -#' - [`uby_rolling_max_tick()`][uby_rolling_max_tick] -#' - [`uby_rolling_max_time()`][uby_rolling_max_time] -#' - [`uby_rolling_sum_tick()`][uby_rolling_sum_tick] -#' - [`uby_rolling_sum_time()`][uby_rolling_sum_time] -#' - [`uby_rolling_prod_tick()`][uby_rolling_prod_tick] -#' - [`uby_rolling_prod_time()`][uby_rolling_prod_time] -#' - [`uby_rolling_avg_tick()`][uby_rolling_avg_tick] -#' - [`uby_rolling_avg_time()`][uby_rolling_avg_time] -#' - [`uby_rolling_wavg_tick()`][uby_rolling_wavg_tick] -#' - [`uby_rolling_wavg_time()`][uby_rolling_wavg_time] -#' - [`uby_rolling_std_tick()`][uby_rolling_std_tick] -#' - [`uby_rolling_std_time()`][uby_rolling_std_time] -#' -#' For more details on each aggregation function, click on one of the methods above or see the reference documentation -#' by running `?uby_cum_min`, `?uby_delta`, etc. -#' -#' @examples -#' \dontrun{ -#' library(rdeephaven) -#' -#' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") -#' -#' # create data frame, push to server, retrieve TableHandle -#' df <- data.frame( -#' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), -#' col1 = sample(10000, size = 500, replace = TRUE), -#' col2 = sample(10000, size = 500, replace = TRUE), -#' col3 = 1:500 -#' ) -#' th <- client$import_table(df) -#' -#' # compute 10-row exponential weighted moving average of col1 and col2, grouped by boolCol -#' th1 <- th$ -#' update_by(uby_ema_tick(decay_ticks=10, cols=c("col1Ema = col1", "col2Ema = col2")), by="boolCol") -#' -#' # compute rolling 10-second weighted average and standard deviation of col1 and col2, weighted by col3 -#' th2 <- th$ -#' update_by( -#' c(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1WAvg = col1", "col2WAvg = col2"), rev_time="PT10s"), -#' uby_rolling_std_time(ts_col="timeCol", cols=c("col1Std = col1", "col2Std = col2"), rev_time="PT10s"))) -#' -#' # compute cumulative minimum and maximum of col1 and col2 respectively, and the rolling 20-row sum of col3, grouped by boolCol -#' th3 <- th$ -#' update_by( -#' c(uby_cum_min(cols="col1"), -#' uby_cum_max(cols="col2"), -#' uby_rolling_sum_tick(cols="col3", rev_ticks=20)), -#' by="boolCol") -#' -#' client$close() -#' } -#' -NULL - - #' @name UpdateByOp #' @title Deephaven UpdateByOps #' @md #' @description #' An `UpdateByOp` is the return type of one of Deephaven's [`uby`][UpdateBy] functions. It is a function that performs #' the computation specified by the `uby` function. These are intended to be passed directly to `update_by()`, -#' and should never be instantiated directly be user code. +#' and should never be instantiated directly be user code. For more information, see the vignette on +#' `uby` functions with `vignette("update_by")`. #' #' If multiple tables have the same schema and the same UpdateBy operations need to be applied to each table, saving #' these objects directly in a variable may be useful to avoid having to re-create them each time: @@ -172,6 +60,9 @@ UpdateByOp <- R6Class("UpdateByOp", #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the cumulative sum for all non-grouping columns. @@ -182,12 +73,12 @@ UpdateByOp <- R6Class("UpdateByOp", #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -200,12 +91,12 @@ UpdateByOp <- R6Class("UpdateByOp", #' #' # compute cumulative sum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by="boolCol") +#' update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by = "boolCol") #' #' # compute cumulative sum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -235,6 +126,9 @@ uby_cum_sum <- function(cols = character()) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the cumulative product for all non-grouping columns. @@ -245,12 +139,12 @@ uby_cum_sum <- function(cols = character()) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -263,12 +157,12 @@ uby_cum_sum <- function(cols = character()) { #' #' # compute cumulative product of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by="boolCol") +#' update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by = "boolCol") #' #' # compute cumulative product of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -298,6 +192,9 @@ uby_cum_prod <- function(cols = character()) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the cumulative minimum for all non-grouping columns. @@ -308,12 +205,12 @@ uby_cum_prod <- function(cols = character()) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -326,12 +223,12 @@ uby_cum_prod <- function(cols = character()) { #' #' # compute cumulative minimum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by="boolCol") +#' update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by = "boolCol") #' #' # compute cumulative minimum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -361,6 +258,9 @@ uby_cum_min <- function(cols = character()) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the cumulative maximum for all non-grouping columns. @@ -371,12 +271,12 @@ uby_cum_min <- function(cols = character()) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -389,12 +289,12 @@ uby_cum_min <- function(cols = character()) { #' #' # compute cumulative maximum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by="boolCol") +#' update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by = "boolCol") #' #' # compute cumulative maximum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -425,6 +325,9 @@ uby_cum_max <- function(cols = character()) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to perform a forward fill on all non-grouping columns. @@ -435,12 +338,12 @@ uby_cum_max <- function(cols = character()) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = replace(sample(10000, size = 500, replace = TRUE), sample(500, 100), NA), #' col2 = replace(sample(10000, size = 500, replace = TRUE), sample(500, 100), NA), #' col3 = replace(1:500, sample(500, 100), NA) @@ -453,13 +356,13 @@ uby_cum_max <- function(cols = character()) { #' #' # forward fill col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_forward_fill(c("col1", "col2")), by="boolCol") +#' update_by(uby_forward_fill(c("col1", "col2")), by = "boolCol") #' #' # forward fill col3, compute parity of col3, and forward fill col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update_by(uby_forward_fill("col3"))$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_forward_fill(c("col1", "col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_forward_fill(c("col1", "col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -497,6 +400,9 @@ uby_forward_fill <- function(cols = character()) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to apply the delta operation to all non-grouping columns. @@ -508,12 +414,12 @@ uby_forward_fill <- function(cols = character()) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -526,12 +432,12 @@ uby_forward_fill <- function(cols = character()) { #' #' # compute consecutive differences of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by="boolCol") +#' update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by = "boolCol") #' #' # compute consecutive differences of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -575,6 +481,9 @@ uby_delta <- function(cols = character(), delta_control = "null_dominates") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_ticks Numeric scalar denoting the decay rate in ticks. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -587,12 +496,12 @@ uby_delta <- function(cols = character(), delta_control = "null_dominates") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -601,16 +510,16 @@ uby_delta <- function(cols = character(), delta_control = "null_dominates") { #' #' # compute 10-row exponential moving average of col1 and col2 #' th1 <- th$ -#' update_by(uby_ema_tick(decay_ticks=10, cols=c("col1Ema = col1", "col2Ema = col2"))) +#' update_by(uby_ema_tick(decay_ticks = 10, cols = c("col1Ema = col1", "col2Ema = col2"))) #' #' # compute 5-row exponential moving average of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_ema_tick(decay_ticks=5, cols=c("col1Ema = col1", "col2Ema = col2")), by="boolCol") +#' update_by(uby_ema_tick(decay_ticks = 5, cols = c("col1Ema = col1", "col2Ema = col2")), by = "boolCol") #' #' # compute 20-row exponential moving average of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_ema_tick(decay_ticks=20, cols=c("col1Ema = col1", "col2Ema = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_ema_tick(decay_ticks = 20, cols = c("col1Ema = col1", "col2Ema = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -654,6 +563,9 @@ uby_ema_tick <- function(decay_ticks, cols = character(), operation_control = op #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param decay_time ISO-8601-formatted duration string specifying the decay rate. @@ -667,12 +579,12 @@ uby_ema_tick <- function(decay_ticks, cols = character(), operation_control = op #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -681,16 +593,16 @@ uby_ema_tick <- function(decay_ticks, cols = character(), operation_control = op #' #' # compute 10-second exponential moving average of col1 and col2 #' th1 <- th$ -#' update_by(uby_ema_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Ema = col1", "col2Ema = col2"))) +#' update_by(uby_ema_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Ema = col1", "col2Ema = col2"))) #' #' # compute 5-second exponential moving average of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_ema_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Ema = col1", "col2Ema = col2")), by="boolCol") +#' update_by(uby_ema_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Ema = col1", "col2Ema = col2")), by = "boolCol") #' #' # compute 20-second exponential moving average of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_ema_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Ema = col1", "col2Ema = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_ema_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Ema = col1", "col2Ema = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -734,6 +646,9 @@ uby_ema_time <- function(ts_col, decay_time, cols = character(), operation_contr #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_ticks Numeric scalar denoting the decay rate in ticks. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -746,12 +661,12 @@ uby_ema_time <- function(ts_col, decay_time, cols = character(), operation_contr #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -760,16 +675,16 @@ uby_ema_time <- function(ts_col, decay_time, cols = character(), operation_contr #' #' # compute 10-row exponential moving sum of col1 and col2 #' th1 <- th$ -#' update_by(uby_ems_tick(decay_ticks=10, cols=c("col1Ems = col1", "col2Ems = col2"))) +#' update_by(uby_ems_tick(decay_ticks = 10, cols = c("col1Ems = col1", "col2Ems = col2"))) #' #' # compute 5-row exponential moving sum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_ems_tick(decay_ticks=5, cols=c("col1Ems = col1", "col2Ems = col2")), by="boolCol") +#' update_by(uby_ems_tick(decay_ticks = 5, cols = c("col1Ems = col1", "col2Ems = col2")), by = "boolCol") #' #' # compute 20-row exponential moving sum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_ems_tick(decay_ticks=20, cols=c("col1Ems = col1", "col2Ems = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_ems_tick(decay_ticks = 20, cols = c("col1Ems = col1", "col2Ems = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -813,6 +728,9 @@ uby_ems_tick <- function(decay_ticks, cols = character(), operation_control = op #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_time ISO-8601-formatted duration string specifying the decay rate. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -825,12 +743,12 @@ uby_ems_tick <- function(decay_ticks, cols = character(), operation_control = op #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -839,16 +757,16 @@ uby_ems_tick <- function(decay_ticks, cols = character(), operation_control = op #' #' # compute 10-second exponential moving sum of col1 and col2 #' th1 <- th$ -#' update_by(uby_ems_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Ems = col1", "col2Ems = col2"))) +#' update_by(uby_ems_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Ems = col1", "col2Ems = col2"))) #' #' # compute 5-second exponential moving sum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_ems_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Ems = col1", "col2Ems = col2")), by="boolCol") +#' update_by(uby_ems_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Ems = col1", "col2Ems = col2")), by = "boolCol") #' #' # compute 20-second exponential moving sum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_ems_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Ems = col1", "col2Ems = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_ems_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Ems = col1", "col2Ems = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -892,6 +810,9 @@ uby_ems_time <- function(ts_col, decay_time, cols = character(), operation_contr #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_ticks Numeric scalar denoting the decay rate in ticks. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -904,12 +825,12 @@ uby_ems_time <- function(ts_col, decay_time, cols = character(), operation_contr #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -918,16 +839,16 @@ uby_ems_time <- function(ts_col, decay_time, cols = character(), operation_contr #' #' # compute 10-row exponential moving minimum of col1 and col2 #' th1 <- th$ -#' update_by(uby_emmin_tick(decay_ticks=10, cols=c("col1Emmin = col1", "col2Emmin = col2"))) +#' update_by(uby_emmin_tick(decay_ticks = 10, cols = c("col1Emmin = col1", "col2Emmin = col2"))) #' #' # compute 5-row exponential moving minimum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_emmin_tick(decay_ticks=5, cols=c("col1Emmin = col1", "col2Emmin = col2")), by="boolCol") +#' update_by(uby_emmin_tick(decay_ticks = 5, cols = c("col1Emmin = col1", "col2Emmin = col2")), by = "boolCol") #' #' # compute 20-row exponential moving minimum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_emmin_tick(decay_ticks=20, cols=c("col1Emmin = col1", "col2Emmin = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_emmin_tick(decay_ticks = 20, cols = c("col1Emmin = col1", "col2Emmin = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -971,6 +892,9 @@ uby_emmin_tick <- function(decay_ticks, cols = character(), operation_control = #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_time ISO-8601-formatted duration string specifying the decay rate. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -983,12 +907,12 @@ uby_emmin_tick <- function(decay_ticks, cols = character(), operation_control = #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -997,16 +921,16 @@ uby_emmin_tick <- function(decay_ticks, cols = character(), operation_control = #' #' # compute 10-second exponential moving minimum of col1 and col2 #' th1 <- th$ -#' update_by(uby_emmin_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Emmin = col1", "col2Emmin = col2"))) +#' update_by(uby_emmin_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Emmin = col1", "col2Emmin = col2"))) #' #' # compute 5-second exponential moving minimum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_emmin_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Emmin = col1", "col2Emmin = col2")), by="boolCol") +#' update_by(uby_emmin_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Emmin = col1", "col2Emmin = col2")), by = "boolCol") #' #' # compute 20-second exponential moving minimum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_emmin_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Emmin = col1", "col2Emmin = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_emmin_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Emmin = col1", "col2Emmin = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1050,6 +974,9 @@ uby_emmin_time <- function(ts_col, decay_time, cols = character(), operation_con #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_ticks Numeric scalar denoting the decay rate in ticks. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1062,12 +989,12 @@ uby_emmin_time <- function(ts_col, decay_time, cols = character(), operation_con #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1076,16 +1003,16 @@ uby_emmin_time <- function(ts_col, decay_time, cols = character(), operation_con #' #' # compute 10-row exponential moving maximum of col1 and col2 #' th1 <- th$ -#' update_by(uby_emmax_tick(decay_ticks=10, cols=c("col1Emmax = col1", "col2Emmax = col2"))) +#' update_by(uby_emmax_tick(decay_ticks = 10, cols = c("col1Emmax = col1", "col2Emmax = col2"))) #' #' # compute 5-row exponential moving maximum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_emmax_tick(decay_ticks=5, cols=c("col1Emmax = col1", "col2Emmax = col2")), by="boolCol") +#' update_by(uby_emmax_tick(decay_ticks = 5, cols = c("col1Emmax = col1", "col2Emmax = col2")), by = "boolCol") #' #' # compute 20-row exponential moving maximum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_emmax_tick(decay_ticks=20, cols=c("col1Emmax = col1", "col2Emmax = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_emmax_tick(decay_ticks = 20, cols = c("col1Emmax = col1", "col2Emmax = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1129,6 +1056,9 @@ uby_emmax_tick <- function(decay_ticks, cols = character(), operation_control = #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_time ISO-8601-formatted duration string specifying the decay rate. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1141,12 +1071,12 @@ uby_emmax_tick <- function(decay_ticks, cols = character(), operation_control = #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1155,16 +1085,16 @@ uby_emmax_tick <- function(decay_ticks, cols = character(), operation_control = #' #' # compute 10-second exponential moving maximum of col1 and col2 #' th1 <- th$ -#' update_by(uby_emmax_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Emmax = col1", "col2Emmax = col2"))) +#' update_by(uby_emmax_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Emmax = col1", "col2Emmax = col2"))) #' #' # compute 5-second exponential moving maximum of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_emmax_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Emmax = col1", "col2Emmax = col2")), by="boolCol") +#' update_by(uby_emmax_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Emmax = col1", "col2Emmax = col2")), by = "boolCol") #' #' # compute 20-second exponential moving maximum of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_emmax_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Emmax = col1", "col2Emmax = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_emmax_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Emmax = col1", "col2Emmax = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1213,6 +1143,9 @@ uby_emmax_time <- function(ts_col, decay_time, cols = character(), operation_con #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_ticks Numeric scalar denoting the decay rate in ticks. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1225,12 +1158,12 @@ uby_emmax_time <- function(ts_col, decay_time, cols = character(), operation_con #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1239,16 +1172,16 @@ uby_emmax_time <- function(ts_col, decay_time, cols = character(), operation_con #' #' # compute 10-row exponential moving standard deviation of col1 and col2 #' th1 <- th$ -#' update_by(uby_emstd_tick(decay_ticks=10, cols=c("col1Emstd = col1", "col2Emstd = col2"))) +#' update_by(uby_emstd_tick(decay_ticks = 10, cols = c("col1Emstd = col1", "col2Emstd = col2"))) #' #' # compute 5-row exponential moving standard deviation of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_emstd_tick(decay_ticks=5, cols=c("col1Emstd = col1", "col2Emstd = col2")), by="boolCol") +#' update_by(uby_emstd_tick(decay_ticks = 5, cols = c("col1Emstd = col1", "col2Emstd = col2")), by = "boolCol") #' #' # compute 20-row exponential moving standard deviation of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_emstd_tick(decay_ticks=20, cols=c("col1Emstd = col1", "col2Emstd = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_emstd_tick(decay_ticks = 20, cols = c("col1Emstd = col1", "col2Emstd = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1297,6 +1230,9 @@ uby_emstd_tick <- function(decay_ticks, cols = character(), operation_control = #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param decay_time ISO-8601-formatted duration string specifying the decay rate. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1309,12 +1245,12 @@ uby_emstd_tick <- function(decay_ticks, cols = character(), operation_control = #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1323,16 +1259,16 @@ uby_emstd_tick <- function(decay_ticks, cols = character(), operation_control = #' #' # compute 10-second exponential moving standard deviation of col1 and col2 #' th1 <- th$ -#' update_by(uby_emstd_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Emstd = col1", "col2Emstd = col2"))) +#' update_by(uby_emstd_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Emstd = col1", "col2Emstd = col2"))) #' #' # compute 5-second exponential moving standard deviation of col1 and col2, grouped by boolCol #' th2 <- th$ -#' update_by(uby_emstd_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Emstd = col1", "col2Emstd = col2")), by="boolCol") +#' update_by(uby_emstd_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Emstd = col1", "col2Emstd = col2")), by = "boolCol") #' #' # compute 20-second exponential moving standard deviation of col1 and col2, grouped by boolCol and parity of col3 #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_emstd_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Emstd = col1", "col2Emstd = col2")), by=c("boolCol", "col3Parity")) +#' update_by(uby_emstd_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Emstd = col1", "col2Emstd = col2")), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1380,6 +1316,9 @@ uby_emstd_time <- function(ts_col, decay_time, cols = character(), operation_con #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling sum for all non-grouping columns. @@ -1392,12 +1331,12 @@ uby_emstd_time <- function(ts_col, decay_time, cols = character(), operation_con #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1406,16 +1345,16 @@ uby_emstd_time <- function(ts_col, decay_time, cols = character(), operation_con #' #' # compute rolling sum of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_sum_tick(cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks=6)) +#' update_by(uby_rolling_sum_tick(cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks = 6)) #' #' # compute rolling sum of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_sum_tick(cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") +#' update_by(uby_rolling_sum_tick(cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling sum of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_sum_tick(cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_sum_tick(cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1459,6 +1398,9 @@ uby_rolling_sum_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1472,12 +1414,12 @@ uby_rolling_sum_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1486,16 +1428,16 @@ uby_rolling_sum_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling sum of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_sum_time(ts_col="timeCol", cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_sum_time(ts_col = "timeCol", cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_time = "PT5s")) #' #' # compute rolling sum of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_sum_time(ts_col="timeCol", cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") +#' update_by(uby_rolling_sum_time(ts_col = "timeCol", cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling sum of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_sum_time(ts_col="timeCol", cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_sum_time(ts_col = "timeCol", cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1543,6 +1485,9 @@ uby_rolling_sum_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling group for all non-grouping columns. @@ -1555,12 +1500,12 @@ uby_rolling_sum_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1569,12 +1514,12 @@ uby_rolling_sum_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling group of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th1 <- th$ -#' update_by(uby_rolling_group_tick(cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") +#' update_by(uby_rolling_group_tick(cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling group of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th2 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_group_tick(cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_group_tick(cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1618,6 +1563,9 @@ uby_rolling_group_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1631,12 +1579,12 @@ uby_rolling_group_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1645,12 +1593,12 @@ uby_rolling_group_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling group of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_group_time(ts_col="timeCol", cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") +#' update_by(uby_rolling_group_time(ts_col = "timeCol", cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling group of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th2 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_group_time(ts_col="timeCol", cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_group_time(ts_col = "timeCol", cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1698,6 +1646,9 @@ uby_rolling_group_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling average for all non-grouping columns. @@ -1710,12 +1661,12 @@ uby_rolling_group_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1724,16 +1675,16 @@ uby_rolling_group_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling average of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_avg_tick(cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks=6)) +#' update_by(uby_rolling_avg_tick(cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks = 6)) #' #' # compute rolling average of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_avg_tick(cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") +#' update_by(uby_rolling_avg_tick(cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling average of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_avg_tick(cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_avg_tick(cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1777,6 +1728,9 @@ uby_rolling_avg_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1790,12 +1744,12 @@ uby_rolling_avg_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1804,16 +1758,16 @@ uby_rolling_avg_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling average of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_avg_time(ts_col="timeCol", cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_avg_time(ts_col = "timeCol", cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time = "PT5s")) #' #' # compute rolling average of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_avg_time(ts_col="timeCol", cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") +#' update_by(uby_rolling_avg_time(ts_col = "timeCol", cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling average of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_avg_time(ts_col="timeCol", cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_avg_time(ts_col = "timeCol", cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1861,6 +1815,9 @@ uby_rolling_avg_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling minimum for all non-grouping columns. @@ -1873,12 +1830,12 @@ uby_rolling_avg_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1887,16 +1844,16 @@ uby_rolling_avg_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling minimum of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_min_tick(cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks=6)) +#' update_by(uby_rolling_min_tick(cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks = 6)) #' #' # compute rolling minimum of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_min_tick(cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") +#' update_by(uby_rolling_min_tick(cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling minimum of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_min_tick(cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_min_tick(cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -1940,6 +1897,9 @@ uby_rolling_min_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -1953,12 +1913,12 @@ uby_rolling_min_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -1967,16 +1927,16 @@ uby_rolling_min_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling minimum of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_min_time(ts_col="timeCol", cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_min_time(ts_col = "timeCol", cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_time = "PT5s")) #' #' # compute rolling minimum of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_min_time(ts_col="timeCol", cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") +#' update_by(uby_rolling_min_time(ts_col = "timeCol", cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling minimum of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_min_time(ts_col="timeCol", cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_min_time(ts_col = "timeCol", cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2024,6 +1984,9 @@ uby_rolling_min_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling maximum for all non-grouping columns. @@ -2036,12 +1999,12 @@ uby_rolling_min_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2050,16 +2013,16 @@ uby_rolling_min_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling maximum of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_max_tick(cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks=6)) +#' update_by(uby_rolling_max_tick(cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks = 6)) #' #' # compute rolling maximum of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_max_tick(cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") +#' update_by(uby_rolling_max_tick(cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling maximum of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_max_tick(cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_max_tick(cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2103,6 +2066,9 @@ uby_rolling_max_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -2116,12 +2082,12 @@ uby_rolling_max_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2130,16 +2096,16 @@ uby_rolling_max_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling maximum of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_max_time(ts_col="timeCol", cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_max_time(ts_col = "timeCol", cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_time = "PT5s")) #' #' # compute rolling maximum of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_max_time(ts_col="timeCol", cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") +#' update_by(uby_rolling_max_time(ts_col = "timeCol", cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling maximum of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_max_time(ts_col="timeCol", cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_max_time(ts_col = "timeCol", cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2187,6 +2153,9 @@ uby_rolling_max_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling product for all non-grouping columns. @@ -2199,12 +2168,12 @@ uby_rolling_max_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2213,16 +2182,16 @@ uby_rolling_max_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling product of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_prod_tick(cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks=6)) +#' update_by(uby_rolling_prod_tick(cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks = 6)) #' #' # compute rolling product of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_prod_tick(cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") +#' update_by(uby_rolling_prod_tick(cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling product of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_prod_tick(cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_prod_tick(cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2266,6 +2235,9 @@ uby_rolling_prod_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -2279,12 +2251,12 @@ uby_rolling_prod_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2293,16 +2265,16 @@ uby_rolling_prod_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling product of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_prod_time(ts_col="timeCol", cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_prod_time(ts_col = "timeCol", cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_time = "PT5s")) #' #' # compute rolling product of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_prod_time(ts_col="timeCol", cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") +#' update_by(uby_rolling_prod_time(ts_col = "timeCol", cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling product of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_prod_time(ts_col="timeCol", cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_prod_time(ts_col = "timeCol", cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2350,6 +2322,9 @@ uby_rolling_prod_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling count for all non-grouping columns. @@ -2362,12 +2337,12 @@ uby_rolling_prod_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2376,16 +2351,16 @@ uby_rolling_prod_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling count of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_count_tick(cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks=6)) +#' update_by(uby_rolling_count_tick(cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks = 6)) #' #' # compute rolling count of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_count_tick(cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks=6, fwd_ticks=5), by="boolCol") +#' update_by(uby_rolling_count_tick(cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling count of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_count_tick(cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks=1, fwd_ticks=10), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_count_tick(cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2429,6 +2404,9 @@ uby_rolling_count_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -2442,12 +2420,12 @@ uby_rolling_count_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2456,16 +2434,16 @@ uby_rolling_count_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling count of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_count_time(ts_col="timeCol", cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_count_time(ts_col = "timeCol", cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_time = "PT5s")) #' #' # compute rolling count of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_count_time(ts_col="timeCol", cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_time="PT5s", fwd_ticks="PT5s"), by="boolCol") +#' update_by(uby_rolling_count_time(ts_col = "timeCol", cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling count of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_count_time(ts_col="timeCol", cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_time="PT0s", fwd_time="PT10s"), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_count_time(ts_col = "timeCol", cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2513,6 +2491,9 @@ uby_rolling_count_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. #' Default is to compute the rolling standard deviation for all non-grouping columns. @@ -2525,12 +2506,12 @@ uby_rolling_count_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2539,16 +2520,16 @@ uby_rolling_count_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling standard deviation of col1 and col2, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_std_tick(cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks=6)) +#' update_by(uby_rolling_std_tick(cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks = 6)) #' #' # compute rolling standard deviation of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_std_tick(cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks=6, fwd_ticks=5), by="boolCol") +#' update_by(uby_rolling_std_tick(cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling standard deviation of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_std_tick(cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks=1, fwd_ticks=10), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_std_tick(cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2592,6 +2573,9 @@ uby_rolling_std_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -2605,12 +2589,12 @@ uby_rolling_std_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2619,16 +2603,16 @@ uby_rolling_std_tick <- function(cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling standard deviation of col1 and col2, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_std_time(ts_col="timeCol", cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_std_time(ts_col = "timeCol", cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_time = "PT5s")) #' #' # compute rolling standard deviation of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_std_time(ts_col="timeCol", cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_time="PT5s", fwd_ticks="PT5s"), by="boolCol") +#' update_by(uby_rolling_std_time(ts_col = "timeCol", cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling standard deviation of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_std_time(ts_col="timeCol", cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_time="PT0s", fwd_time="PT10s"), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_std_time(ts_col = "timeCol", cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2676,6 +2660,9 @@ uby_rolling_std_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param wcol String denoting the column to use for weights. This must be a numeric column. #' @param cols String or list of strings denoting the column(s) to operate on. Can be renaming expressions, i.e. “new_col = col”. @@ -2689,12 +2676,12 @@ uby_rolling_std_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2703,16 +2690,16 @@ uby_rolling_std_time <- function(ts_col, cols, rev_time, fwd_time = "PT0s") { #' #' # compute rolling weighted average of col1 and col2, weighted by col3, using the previous 5 rows and current row #' th1 <- th$ -#' update_by(uby_rolling_wavg_tick(wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks=6)) +#' update_by(uby_rolling_wavg_tick(wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks = 6)) #' #' # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol, using previous 5 rows, current row, and following 5 rows #' th2 <- th$ -#' update_by(uby_rolling_wavg_tick(wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks=6, fwd_ticks=5), by="boolCol") +#' update_by(uby_rolling_wavg_tick(wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") #' #' # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol and parity of col3, using current row and following 10 rows #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_wavg_tick(wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks=1, fwd_ticks=10), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_wavg_tick(wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) #' #' client$close() #' } @@ -2757,6 +2744,9 @@ uby_rolling_wavg_tick <- function(wcol, cols, rev_ticks, fwd_ticks = 0) { #' function called an [`UpdateByOp`][UpdateByOp] intended to be used in a call to `update_by()`. This detail is typically #' hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of #' a `uby` function can otherwise seem unexpected. +#' +#' For more information, see the vignette on `uby` functions by running +#' `vignette("update_by")`. #' #' @param ts_col String denoting the column to use as the timestamp. #' @param wcol String denoting the column to use for weights. This must be a numeric column. @@ -2771,12 +2761,12 @@ uby_rolling_wavg_tick <- function(wcol, cols, rev_ticks, fwd_ticks = 0) { #' library(rdeephaven) #' #' # connecting to Deephaven server -#' client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +#' client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") #' #' # create data frame, push to server, retrieve TableHandle #' df <- data.frame( #' timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], -#' boolCol = sample(c(TRUE,FALSE), 500, TRUE), +#' boolCol = sample(c(TRUE, FALSE), 500, TRUE), #' col1 = sample(10000, size = 500, replace = TRUE), #' col2 = sample(10000, size = 500, replace = TRUE), #' col3 = 1:500 @@ -2785,16 +2775,16 @@ uby_rolling_wavg_tick <- function(wcol, cols, rev_ticks, fwd_ticks = 0) { #' #' # compute rolling weighted average of col1 and col2, weighted by col3, using the previous 5 seconds #' th1 <- th$ -#' update_by(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time="PT5s")) +#' update_by(uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time = "PT5s")) #' #' # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol, using previous 5 seconds, and following 5 seconds #' th2 <- th$ -#' update_by(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time="PT5s", fwd_ticks="PT5s"), by="boolCol") +#' update_by(uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") #' #' # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol and parity of col3, using following 10 seconds #' th3 <- th$ #' update("col3Parity = col3 % 2")$ -#' update_by(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time="PT0s", fwd_time="PT10s"), by=c("boolCol", "col3Parity")) +#' update_by(uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) #' #' client$close() #' } diff --git a/R/rdeephaven/R/utility_functions.R b/R/rdeephaven/R/utility_functions.R index 579fd85b964..54226772ee3 100644 --- a/R/rdeephaven/R/utility_functions.R +++ b/R/rdeephaven/R/utility_functions.R @@ -3,29 +3,28 @@ first_class <- function(arg) { } vector_wrong_element_type_or_value <- function(arg_name, candidate, type_message, descriptor_message) { - stripped_type_message = sub(".*? ", "", type_message) + stripped_type_message <- sub(".*? ", "", type_message) return(paste0("'", arg_name, "' must be ", type_message, ", or a vector of ", stripped_type_message, "s", descriptor_message, ". Got a vector with at least one element that is not ", type_message, descriptor_message, ".")) } vector_wrong_type <- function(arg_name, candidate, type_message, descriptor_message) { - stripped_type_message = sub(".*? ", "", type_message) + stripped_type_message <- sub(".*? ", "", type_message) return(paste0("'", arg_name, "' must be ", type_message, " or a vector of ", stripped_type_message, "s", descriptor_message, ". Got an object of class ", first_class(candidate), ".")) } vector_needed_scalar <- function(arg_name, candidate, type_message, descriptor_message) { - stripped_type_message = sub(".*? ", "", type_message) + stripped_type_message <- sub(".*? ", "", type_message) return(paste0("'", arg_name, "' must be a single ", stripped_type_message, descriptor_message, ". Got a vector of length ", length(candidate), ".")) } scalar_wrong_type <- function(arg_name, candidate, type_message, descriptor_message) { - stripped_type_message = sub(".*? ", "", type_message) + stripped_type_message <- sub(".*? ", "", type_message) return(paste0("'", arg_name, "' must be a single ", stripped_type_message, descriptor_message, ". Got an object of class ", first_class(candidate), ".")) } scalar_wrong_value <- function(arg_name, candidate, type_message, descriptor_message) { - stripped_type_message = sub(".*? ", "", type_message) + stripped_type_message <- sub(".*? ", "", type_message) return(paste0("'", arg_name, "' must be a single ", stripped_type_message, descriptor_message, ". Got '", arg_name, "' = ", candidate, ".")) } # if required_type is a list, this will not behave correctly because of R's type coercion rules verify_type <- function(arg_name, candidate, is_scalar, required_type, type_message, descriptor_message = "") { - if (!is_scalar && (first_class(candidate) == "list")) { if (any(lapply(candidate, first_class) != required_type)) { stop(vector_wrong_element_type_or_value(arg_name, candidate, type_message, descriptor_message)) @@ -49,7 +48,6 @@ verify_type <- function(arg_name, candidate, is_scalar, required_type, type_mess # does not attempt to verify that candidate is numeric, intended to be used after `verify_type()` verify_int <- function(arg_name, candidate, is_scalar, type_message, descriptor_message = "") { - if (is_scalar && (length(c(candidate)) != 1)) { stop(vector_needed_scalar(arg_name, candidate, type_message, descriptor_message)) } else if (candidate != as.integer(candidate)) { @@ -63,12 +61,10 @@ verify_int <- function(arg_name, candidate, is_scalar, type_message, descriptor_ # does not attempt to verify that candidate is numeric, intended to be used after `verify_type()` verify_in_range <- function(arg_name, candidate, is_scalar, type_message, descriptor_message, lb, ub, lb_open, ub_open) { - if (is_scalar && (length(c(candidate)) != 1)) { - stripped_type_message = sub(".*? ", "", type_message) + stripped_type_message <- sub(".*? ", "", type_message) stop(paste0("Every element of '", arg_name, "' must be ", stripped_type_message, range_message, ". Got at least one element that is not ", stripped_type_message, range_message, ".")) - } - else if (((!is.null(lb)) && ((any(candidate <= lb) && (lb_open)) || (any(candidate < lb) && (!lb_open)))) || + } else if (((!is.null(lb)) && ((any(candidate <= lb) && (lb_open)) || (any(candidate < lb) && (!lb_open)))) || ((!is.null(ub)) && ((any(candidate >= ub) && (ub_open)) || (any(candidate > ub) && (!ub_open))))) { if (!is_scalar) { stop(vector_wrong_element_type_or_value(arg_name, candidate, type_message, descriptor_message)) diff --git a/R/rdeephaven/README.md b/R/rdeephaven/README.md index b799f7e79b7..eec85cd5371 100644 --- a/R/rdeephaven/README.md +++ b/R/rdeephaven/README.md @@ -1,11 +1,12 @@ -# The Deephaven R Client -The Deephaven R client is an R package that enables R users to interface with a Deephaven server and perform various +# The Deephaven Core R Client + +The Deephaven Core R client is an R package that enables R users to interface with a Deephaven server and perform various server-side operations from the comfort of RStudio or any other R interface. ## What can the R client do? -The Deephaven Client provides the following functionalities: +The R Client provides the following functionalities: 1. Connect to a Deephaven server - with anonymous authentication (no username or password) @@ -112,13 +113,13 @@ Currently, the R client is only supported on Ubuntu 20.04 or 22.04 and must be b ```r install.packages(c('Rcpp', 'arrow', 'R6', 'dplyr')) ``` - then install the deephaven client itself: + Then, exit the R console with `quit()`. From the rdeephaven directory, build and install the R client: ```r - install.packages(".", repos=NULL, type="source") + cd .. && R CMD build rdeephaven && R CMD INSTALL --no-multiarch --with-keep.source rdeephaven_*.tar.gz && rm rdeephaven_*.tar.gz ``` - This last command can also be executed from RStudio without the need for explicitly starting an R console. + This is needed over the typical `install.packages()` to ensure that the vignettes get built and installed. + - --- **NOTE** If using RStudio for this step, the environment variables that were set in step 3 may not persist into the RStudio @@ -138,15 +139,17 @@ Currently, the R client is only supported on Ubuntu 20.04 or 22.04 and must be b for the RStudio Server installation (the location of that file may depend on your particular RStudio server installation, but a common location is `/etc/rstudio/rserver.conf`). - --- + 6. Now, run ```r library(rdeephaven) ``` in the R session, and start using the client! + + For an introduction to the package, run `vignette("rdeephaven")`. + ---- **NOTE** If an error like this occurs in step 4: @@ -167,7 +170,7 @@ this means that the C++ compiler does not know where to find the relevant header export DHCPP=/path/to/dhcpp ``` 3. The Deephaven C++ client is installed and the `DHCPP` environment variable is set, but the current project is not configured to allow the compiler to access the Deephaven `dhcpp` and `src` directories. This is more difficult to give advice on, as it is an IDE-dependent problem. Consult your IDE's documentation on C/C++ compiler include paths for more information. ---- + ## Running the unit tests diff --git a/R/rdeephaven/inst/tests/testthat/helper.R b/R/rdeephaven/inst/tests/testthat/helper.R index fcb9ad5703c..862a5bd1595 100644 --- a/R/rdeephaven/inst/tests/testthat/helper.R +++ b/R/rdeephaven/inst/tests/testthat/helper.R @@ -1,11 +1,11 @@ get_dh_target <- function() { - dh_host = Sys.getenv("DH_HOST") - if (dh_host == '') { - dh_host = "localhost" + dh_host <- Sys.getenv("DH_HOST") + if (dh_host == "") { + dh_host <- "localhost" } - dh_port = Sys.getenv("DH_PORT") - if (dh_port == '') { - dh_port = 10000 + dh_port <- Sys.getenv("DH_PORT") + if (dh_port == "") { + dh_port <- 10000 } - return(paste0(dh_host, ':', dh_port)) + return(paste0(dh_host, ":", dh_port)) } diff --git a/R/rdeephaven/inst/tests/testthat/test_agg_by.R b/R/rdeephaven/inst/tests/testthat/test_agg_by.R index 1b7a7f7d49c..8d6cc0e13ce 100644 --- a/R/rdeephaven/inst/tests/testthat/test_agg_by.R +++ b/R/rdeephaven/inst/tests/testthat/test_agg_by.R @@ -960,14 +960,20 @@ test_that("agg_count behaves as expected", { test_that("agg_by behaves nicely when given bad input", { data <- setup() - expect_error(data$th1$agg_by(agg_first()), - "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_first' at index 1 with an empty 'cols' argument.") + expect_error( + data$th1$agg_by(agg_first()), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_first' at index 1 with an empty 'cols' argument." + ) - expect_error(data$th1$agg_by(c(agg_first("int_col"), agg_last())), - "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_last' at index 2 with an empty 'cols' argument.") + expect_error( + data$th1$agg_by(c(agg_first("int_col"), agg_last())), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_last' at index 2 with an empty 'cols' argument." + ) - expect_error(data$th1$agg_by(c(agg_first("int_col"), agg_last("int_col"), agg_count("n"), agg_avg())), - "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_avg' at index 4 with an empty 'cols' argument.") + expect_error( + data$th1$agg_by(c(agg_first("int_col"), agg_last("int_col"), agg_count("n"), agg_avg())), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_avg' at index 4 with an empty 'cols' argument." + ) data$client$close() }) diff --git a/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R b/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R index 65d8902a419..686b532cc24 100644 --- a/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R +++ b/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R @@ -30,9 +30,7 @@ setup <- function() { ##### TESTING GOOD INPUTS ##### test_that("client dhConnection works in the simple case of anonymous authentication", { - expect_no_error(client <- Client$new(target = target)) - }) test_that("import_table does not fail with data frame inputs of simple column types", { @@ -150,7 +148,6 @@ int_col("Name_Int_Col", [44, 55, 66]) ##### TESTING BAD INPUTS ##### test_that("client constructor fails nicely with bad inputs", { - expect_error( Client$new(target = target, auth_type = "basic"), "Basic authentication was requested, but 'auth_token' was not provided, and at most one of 'username' or 'password' was provided. Please provide either 'username' and 'password', or 'auth_token'." @@ -239,7 +236,6 @@ test_that("client constructor fails nicely with bad inputs", { Client$new(target = target, extra_headers = list(a = 123)), "'value' must be a single string. Got an object of class numeric." ) - }) test_that("import_table fails nicely with bad inputs", { @@ -298,8 +294,10 @@ test_that("run_script fails nicely with bad input types", { }) test_that("Running Client$new with wrong argument types gives good errors", { - expect_error(Client$new(12345), - "Client initialize first argument must be either a string or an Rcpp::XPtr object.") + expect_error( + Client$new(12345), + "Client initialize first argument must be either a string or an Rcpp::XPtr object." + ) }) test_that("A Client created from an Rcpp::XPtr is functional.", { @@ -309,7 +307,7 @@ test_that("A Client created from an Rcpp::XPtr is functional.", { client2 <- Client$new(client_xptr) t <- client2$open_table("t") df <- t$as_data_frame() - expect_true(df[1,1] == 42) + expect_true(df[1, 1] == 42) client$close() }) @@ -318,7 +316,7 @@ test_that("ticket_to_table works.", { client$empty_table(1)$update("A = 43")$bind_to_variable("t") t <- client$ticket_to_table("s/t") df <- t$as_data_frame() - expect_true(df[1,1] == 43) + expect_true(df[1, 1] == 43) client$close() }) diff --git a/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R b/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R index 3c8d1627a33..a16ee21c3e3 100644 --- a/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R +++ b/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R @@ -68,23 +68,23 @@ test_that("nrow returns the correct number of rows", { test_that("ncol returns the correct number of columns", { data <- setup() - + expect_equal(ncol(data$th1), ncol(data$df1)) expect_equal(ncol(data$th2), ncol(data$df2)) expect_equal(ncol(data$th3), ncol(data$df3)) expect_equal(ncol(data$th4), ncol(data$df4)) - + data$client$close() }) test_that("dim returns the correct dimension", { data <- setup() - + expect_equal(dim(data$th1), dim(data$df1)) expect_equal(dim(data$th2), dim(data$df2)) expect_equal(dim(data$th3), dim(data$df3)) expect_equal(dim(data$th4), dim(data$df4)) - + data$client$close() }) diff --git a/R/rdeephaven/inst/tests/testthat/test_table_ops.R b/R/rdeephaven/inst/tests/testthat/test_table_ops.R index e47afa3847e..fe08d316d0a 100644 --- a/R/rdeephaven/inst/tests/testthat/test_table_ops.R +++ b/R/rdeephaven/inst/tests/testthat/test_table_ops.R @@ -59,32 +59,32 @@ setup <- function() { test_that("merge_tables behaves as expected", { data <- setup() - + new_df1 <- rbind(data$df5) new_th1a <- data$th5$merge() new_th1b <- merge_tables(data$th5) expect_equal(as.data.frame(new_th1a), new_df1) expect_equal(as.data.frame(new_th1b), new_df1) - + new_df2 <- rbind(data$df5, data$df6) new_th2a <- data$th5$merge(data$th6) new_th2b <- merge_tables(data$th5, data$th6) expect_equal(as.data.frame(new_th2a), new_df2) expect_equal(as.data.frame(new_th2b), new_df2) - + new_df3 <- rbind(data$df5, data$df6, data$df6, data$df5) new_th3a <- data$th5$merge(data$th6, data$th6, data$th5) new_th3b <- merge_tables(data$th5, data$th6, data$th6, data$th5) expect_equal(as.data.frame(new_th3a), new_df3) expect_equal(as.data.frame(new_th3b), new_df3) - + new_th4a <- data$th5$merge(c(data$th6)) new_th4b <- merge_tables(data$th5, c(data$th6)) new_th4c <- merge_tables(c(data$th5, data$th6)) expect_equal(as.data.frame(new_th4a), new_df2) expect_equal(as.data.frame(new_th4b), new_df2) expect_equal(as.data.frame(new_th4c), new_df2) - + new_th5a <- data$th5$merge(c(data$th6, NULL, data$th6, data$th5)) new_th5b <- merge_tables(data$th5, c(data$th6, NULL, data$th6, data$th5)) new_th5c <- merge_tables(c(data$th5, data$th6, NULL, data$th6, data$th5)) @@ -97,7 +97,7 @@ test_that("merge_tables behaves as expected", { expect_equal(as.data.frame(new_th5d), new_df3) expect_equal(as.data.frame(new_th5e), new_df3) expect_equal(as.data.frame(new_th5f), new_df3) - + data$client$close() }) @@ -773,9 +773,9 @@ test_that("join behaves as expected", { new_th1 <- data$th5$ join(data$th6, - on = character(), - joins = c("X_y = X", "Y_y = Y", "Number1_y = Number1", "Number2_y = Number2") - ) + on = character(), + joins = c("X_y = X", "Y_y = Y", "Number1_y = Number1", "Number2_y = Number2") + ) new_tb1 <- data$df5 %>% cross_join(data$df6) %>% rename( @@ -795,9 +795,9 @@ test_that("natural_join behaves as expected", { avg_by("X") new_th1 <- data$th5$ natural_join(new_th2, - on = "X", - joins = c("Number3 = Number1", "Number4 = Number2") - ) + on = "X", + joins = c("Number3 = Number1", "Number4 = Number2") + ) new_tb2 <- data$df6 %>% select(-Y) %>% @@ -822,9 +822,9 @@ test_that("exact_join behaves as expected", { avg_by("X") new_th1 <- data$th5$ exact_join(new_th2, - on = "X", - joins = c("Number3 = Number1", "Number4 = Number2") - ) + on = "X", + joins = c("Number3 = Number1", "Number4 = Number2") + ) new_tb2 <- data$df6 %>% select(-Y) %>% diff --git a/R/rdeephaven/inst/tests/testthat/test_update_by.R b/R/rdeephaven/inst/tests/testthat/test_update_by.R index 7b858e3e909..372e04c265a 100644 --- a/R/rdeephaven/inst/tests/testthat/test_update_by.R +++ b/R/rdeephaven/inst/tests/testthat/test_update_by.R @@ -6,7 +6,7 @@ library(zoo) # We suppress warnings because warnings are thrown when min() and max() are # applied to empty sets, which happens in the pure-R versions of rolling_*_time() -options(warn=-1) +options(warn = -1) setup <- function() { df1 <- data.frame( @@ -26,7 +26,7 @@ setup <- function() { bool_col = sample(c(TRUE, FALSE), 250000, TRUE), int_col = sample(0:10000, 250000, TRUE) ) - + deterministic_df3 <- data.frame( time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[1:500], bool_col = rep(c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE), 50), @@ -57,14 +57,24 @@ setup <- function() { deterministic_th3 <- client$import_table(deterministic_df3) th4 <- client$import_table(df4) th5 <- client$import_table(df5) - + # create variants with missing data to test NULL stuff - null_df1 <- as.data.frame(lapply(df1, function(x) {replace(x, sample(length(x), .5*length(x)), NA)})) - null_df2 <- as.data.frame(lapply(df2, function(x) {replace(x, sample(length(x), .5*length(x)), NA)})) - null_df3 <- as.data.frame(lapply(df3, function(x) {replace(x, sample(length(x), .5*length(x)), NA)})) - null_df4 <- as.data.frame(lapply(df4, function(x) {replace(x, sample(length(x), .5*length(x)), NA)})) - null_df5 <- as.data.frame(lapply(df5, function(x) {replace(x, sample(length(x), .5*length(x)), NA)})) - + null_df1 <- as.data.frame(lapply(df1, function(x) { + replace(x, sample(length(x), .5 * length(x)), NA) + })) + null_df2 <- as.data.frame(lapply(df2, function(x) { + replace(x, sample(length(x), .5 * length(x)), NA) + })) + null_df3 <- as.data.frame(lapply(df3, function(x) { + replace(x, sample(length(x), .5 * length(x)), NA) + })) + null_df4 <- as.data.frame(lapply(df4, function(x) { + replace(x, sample(length(x), .5 * length(x)), NA) + })) + null_df5 <- as.data.frame(lapply(df5, function(x) { + replace(x, sample(length(x), .5 * length(x)), NA) + })) + null_th1 <- client$import_table(null_df1) null_th2 <- client$import_table(null_df2) null_th3 <- client$import_table(null_df3) @@ -89,300 +99,300 @@ custom_rolling_time_op <- function(col, group_col, ...) { test_that("uby_cum_sum behaves as expected", { data <- setup() - + new_tb1 <- data$df1 %>% mutate(sum_int_col = cumsum(int_col)) new_th1 <- data$th1$ update_by(uby_cum_sum("sum_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(sum_col1 = cumsum(col1), sum_col3 = cumsum(col3)) new_th2 <- data$th2$ update_by(uby_cum_sum(c("sum_col1 = col1", "sum_col3 = col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(sum_int_col = cumsum(int_col)) new_th3 <- data$th3$ update_by(uby_cum_sum("sum_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(sum_Number1 = cumsum(Number1), sum_Number2 = cumsum(Number2)) new_th4 <- data$th4$ update_by(uby_cum_sum(c("sum_Number1 = Number1", "sum_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(sum_Number1 = cumsum(Number1), sum_Number2 = cumsum(Number2)) new_th5 <- data$th5$ update_by(uby_cum_sum(c("sum_Number1 = Number1", "sum_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(sum_Number1 = cumsum(Number1), sum_Number2 = cumsum(Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_cum_sum(c("sum_Number1 = Number1", "sum_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_cum_prod behaves as expected", { data <- setup() - + new_tb1 <- data$df1 %>% mutate(prod_int_col = cumprod(int_col)) new_th1 <- data$th1$ update_by(uby_cum_prod("prod_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(prod_col1 = cumprod(col1), prod_col3 = cumprod(col3)) new_th2 <- data$th2$ update_by(uby_cum_prod(c("prod_col1 = col1", "prod_col3 = col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + # Using df3 yields gigantic products, which leads to overflow on the server # due to the column being an int. Clients cannot cast to java BigInt type, # so once a table has an int type, we cannot change it from here. Thus, using # cum_prod on int columns from R should be done with an abundance of caution, # and probably not at all. Make it a double before pushing to the server. - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(prod_Number1 = cumprod(Number1), prod_Number2 = cumprod(Number2)) new_th4 <- data$th4$ update_by(uby_cum_prod(c("prod_Number1 = Number1", "prod_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(prod_Number1 = cumprod(Number1), prod_Number2 = cumprod(Number2)) new_th5 <- data$th5$ update_by(uby_cum_prod(c("prod_Number1 = Number1", "prod_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(prod_Number1 = cumprod(Number1), prod_Number2 = cumprod(Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_cum_prod(c("prod_Number1 = Number1", "prod_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_cum_min behaves as expected", { data <- setup() - + new_tb1 <- data$df1 %>% mutate(min_int_col = cummin(int_col)) new_th1 <- data$th1$ update_by(uby_cum_min("min_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(min_col1 = cummin(col1), min_col3 = cummin(col3)) new_th2 <- data$th2$ update_by(uby_cum_min(c("min_col1 = col1", "min_col3 = col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(min_int_col = cummin(int_col)) new_th3 <- data$th3$ update_by(uby_cum_min("min_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(min_Number1 = cummin(Number1), min_Number2 = cummin(Number2)) new_th4 <- data$th4$ update_by(uby_cum_min(c("min_Number1 = Number1", "min_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(min_Number1 = cummin(Number1), min_Number2 = cummin(Number2)) new_th5 <- data$th5$ update_by(uby_cum_min(c("min_Number1 = Number1", "min_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(min_Number1 = cummin(Number1), min_Number2 = cummin(Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_cum_min(c("min_Number1 = Number1", "min_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_cum_max behaves as expected", { data <- setup() - + new_tb1 <- data$df1 %>% mutate(max_int_col = cummax(int_col)) new_th1 <- data$th1$ update_by(uby_cum_max("max_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(max_col1 = cummax(col1), max_col3 = cummax(col3)) new_th2 <- data$th2$ update_by(uby_cum_max(c("max_col1 = col1", "max_col3 = col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(max_int_col = cummax(int_col)) new_th3 <- data$th3$ update_by(uby_cum_max("max_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(max_Number1 = cummax(Number1), max_Number2 = cummax(Number2)) new_th4 <- data$th4$ update_by(uby_cum_max(c("max_Number1 = Number1", "max_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(max_Number1 = cummax(Number1), max_Number2 = cummax(Number2)) new_th5 <- data$th5$ update_by(uby_cum_max(c("max_Number1 = Number1", "max_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(max_Number1 = cummax(Number1), max_Number2 = cummax(Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_cum_max(c("max_Number1 = Number1", "max_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_forward_fill behaves as expected", { data <- setup() - + new_th1 <- data$null_th1$ update_by(uby_forward_fill()) expect_equal(as.data.frame(new_th1), na.locf(data$null_df1, na.rm = FALSE)) - + new_th2 <- data$null_th2$ update_by(uby_forward_fill()) expect_equal(as.data.frame(new_th2), na.locf(data$null_df2, na.rm = FALSE)) - + new_th3 <- data$null_th3$ update_by(uby_forward_fill()) expect_equal(as.data.frame(new_th3), na.locf(data$null_df3, na.rm = FALSE)) - + new_th4 <- data$null_th4$ update_by(uby_forward_fill()) expect_equal(as.data.frame(new_th4), na.locf(data$null_df4, na.rm = FALSE)) - + new_th5 <- data$null_th5$ update_by(uby_forward_fill()) expect_equal(as.data.frame(new_th5), na.locf(data$null_df5, na.rm = FALSE)) - + data$client$close() }) test_that("uby_delta behaves as expected", { data <- setup() - + new_tb1 <- data$df1 %>% mutate(delta_int_col = c(NaN, diff(int_col))) new_th1 <- data$th1$ update_by(uby_delta("delta_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(delta_col1 = c(NaN, diff(col1)), delta_col3 = c(NaN, diff(col3))) new_th2 <- data$th2$ update_by(uby_delta(c("delta_col1 = col1", "delta_col3 = col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(delta_int_col = c(NaN, diff(int_col))) new_th3 <- data$th3$ update_by(uby_delta("delta_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(delta_Number1 = c(NaN, diff(Number1)), delta_Number2 = c(NaN, diff(Number2))) new_th4 <- data$th4$ update_by(uby_delta(c("delta_Number1 = Number1", "delta_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(delta_Number1 = c(NaN, diff(Number1)), delta_Number2 = c(NaN, diff(Number2))) new_th5 <- data$th5$ update_by(uby_delta(c("delta_Number1 = Number1", "delta_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(delta_Number1 = c(NaN, diff(Number1)), delta_Number2 = c(NaN, diff(Number2))) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_delta(c("delta_Number1 = Number1", "delta_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_ema_tick behaves as expected", { data <- setup() - + custom_ema <- function(decay_ticks, x) { if (length(x) == 1) { return(x) } - a = exp(-1/decay_ticks) - ema = c(x[1]) - for(i in seq(2,length(x))) { - ema[i] = a*ema[i-1] + (1-a)*x[i] + a <- exp(-1 / decay_ticks) + ema <- c(x[1]) + for (i in seq(2, length(x))) { + ema[i] <- a * ema[i - 1] + (1 - a) * x[i] } return(ema) } - + new_tb1 <- data$df1 %>% mutate(dbl_col = custom_ema(2, dbl_col)) new_th1 <- data$th1$ update_by(uby_ema_tick(2, "dbl_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(col1 = custom_ema(5, col1), col3 = custom_ema(5, col3)) new_th2 <- data$th2$ update_by(uby_ema_tick(5, c("col1", "col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(ema_int_col = custom_ema(9, int_col)) new_th3 <- data$th3$ update_by(uby_ema_tick(9, "ema_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(ema_Number1 = custom_ema(3, Number1), ema_Number2 = custom_ema(3, Number2)) new_th4 <- data$th4$ update_by(uby_ema_tick(3, c("ema_Number1 = Number1", "ema_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(ema_Number1 = custom_ema(3, Number1), ema_Number2 = custom_ema(3, Number2)) @@ -396,327 +406,327 @@ test_that("uby_ema_tick behaves as expected", { new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_ema_tick(3, c("ema_Number1 = Number1", "ema_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_ema_time behaves as expected", { data <- setup() - + custom_ema_time <- function(ts, decay_time, x) { if (length(x) == 1) { return(x) } - time_diffs = as.numeric(ts[2:length(ts)] - ts[1:length(ts)-1]) - a = exp(-time_diffs/as.numeric(duration(decay_time))) - ema = c(x[1]) - for(i in seq(2,length(x))) { - ema[i] = a[i-1]*ema[i-1] + (1-a[i-1])*x[i] + time_diffs <- as.numeric(ts[2:length(ts)] - ts[1:length(ts) - 1]) + a <- exp(-time_diffs / as.numeric(duration(decay_time))) + ema <- c(x[1]) + for (i in seq(2, length(x))) { + ema[i] <- a[i - 1] * ema[i - 1] + (1 - a[i - 1]) * x[i] } return(ema) } - + new_tb1 <- data$df3 %>% mutate(ema_int_col = custom_ema_time(time_col, "PT3s", int_col)) new_th1 <- data$th3$ update_by(uby_ema_time("time_col", "PT3s", "ema_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df3 %>% group_by(bool_col) %>% mutate(ema_int_col = custom_ema_time(time_col, "PT3s", int_col)) new_th2 <- data$th3$ update_by(uby_ema_time("time_col", "PT3s", "ema_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + data$client$close() }) test_that("uby_ems_tick behaves as expected", { data <- setup() - + custom_ems <- function(decay_ticks, x) { if (length(x) == 1) { return(x) } - a = exp(-1/decay_ticks) - ems = c(x[1]) - for(i in seq(2,length(x))) { - ems[i] = a*ems[i-1] + x[i] + a <- exp(-1 / decay_ticks) + ems <- c(x[1]) + for (i in seq(2, length(x))) { + ems[i] <- a * ems[i - 1] + x[i] } return(ems) } - + new_tb1 <- data$df1 %>% mutate(dbl_col = custom_ems(2, dbl_col)) new_th1 <- data$th1$ update_by(uby_ems_tick(2, "dbl_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(col1 = custom_ems(5, col1), col3 = custom_ems(5, col3)) new_th2 <- data$th2$ update_by(uby_ems_tick(5, c("col1", "col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(ems_int_col = custom_ems(9, int_col)) new_th3 <- data$th3$ update_by(uby_ems_tick(9, "ems_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(ems_Number1 = custom_ems(3, Number1), ems_Number2 = custom_ems(3, Number2)) new_th4 <- data$th4$ update_by(uby_ems_tick(3, c("ems_Number1 = Number1", "ems_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(ems_Number1 = custom_ems(3, Number1), ems_Number2 = custom_ems(3, Number2)) new_th5 <- data$th5$ update_by(uby_ems_tick(3, c("ems_Number1 = Number1", "ems_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(ems_Number1 = custom_ems(3, Number1), ems_Number2 = custom_ems(3, Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_ems_tick(3, c("ems_Number1 = Number1", "ems_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_ems_time behaves as expected", { data <- setup() - + custom_ems_time <- function(ts, decay_time, x) { if (length(x) == 1) { return(x) } - time_diffs = as.numeric(ts[2:length(ts)] - ts[1:length(ts)-1]) - a = exp(-time_diffs/as.numeric(duration(decay_time))) - ems = c(x[1]) - for(i in seq(2,length(x))) { - ems[i] = a[i-1]*ems[i-1] + x[i] + time_diffs <- as.numeric(ts[2:length(ts)] - ts[1:length(ts) - 1]) + a <- exp(-time_diffs / as.numeric(duration(decay_time))) + ems <- c(x[1]) + for (i in seq(2, length(x))) { + ems[i] <- a[i - 1] * ems[i - 1] + x[i] } return(ems) } - + new_tb1 <- data$df3 %>% mutate(ems_int_col = custom_ems_time(time_col, "PT3s", int_col)) new_th1 <- data$th3$ update_by(uby_ems_time("time_col", "PT3s", "ems_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df3 %>% group_by(bool_col) %>% mutate(ems_int_col = custom_ems_time(time_col, "PT3s", int_col)) new_th2 <- data$th3$ update_by(uby_ems_time("time_col", "PT3s", "ems_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + data$client$close() }) test_that("uby_emmin_tick behaves as expected", { data <- setup() - + custom_emmin <- function(decay_ticks, x) { if (length(x) == 1) { return(x) } - a = exp(-1/decay_ticks) - emmin = c(x[1]) - for(i in seq(2,length(x))) { - emmin[i] = min(a*emmin[i-1], x[i]) + a <- exp(-1 / decay_ticks) + emmin <- c(x[1]) + for (i in seq(2, length(x))) { + emmin[i] <- min(a * emmin[i - 1], x[i]) } return(emmin) } - + new_tb1 <- data$df1 %>% mutate(dbl_col = custom_emmin(2, dbl_col)) new_th1 <- data$th1$ update_by(uby_emmin_tick(2, "dbl_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(col1 = custom_emmin(5, col1), col3 = custom_emmin(5, col3)) new_th2 <- data$th2$ update_by(uby_emmin_tick(5, c("col1", "col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(emmin_int_col = custom_emmin(9, int_col)) new_th3 <- data$th3$ update_by(uby_emmin_tick(9, "emmin_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(emmin_Number1 = custom_emmin(3, Number1), emmin_Number2 = custom_emmin(3, Number2)) new_th4 <- data$th4$ update_by(uby_emmin_tick(3, c("emmin_Number1 = Number1", "emmin_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(emmin_Number1 = custom_emmin(3, Number1), emmin_Number2 = custom_emmin(3, Number2)) new_th5 <- data$th5$ update_by(uby_emmin_tick(3, c("emmin_Number1 = Number1", "emmin_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(emmin_Number1 = custom_emmin(3, Number1), emmin_Number2 = custom_emmin(3, Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_emmin_tick(3, c("emmin_Number1 = Number1", "emmin_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_emmin_time behaves as expected", { data <- setup() - + custom_emmin_time <- function(ts, decay_time, x) { if (length(x) == 1) { return(x) } - time_diffs = as.numeric(ts[2:length(ts)] - ts[1:length(ts)-1]) - a = exp(-time_diffs/as.numeric(duration(decay_time))) - emmin = c(x[1]) - for(i in seq(2,length(x))) { - emmin[i] = min(a[i-1]*emmin[i-1], x[i]) + time_diffs <- as.numeric(ts[2:length(ts)] - ts[1:length(ts) - 1]) + a <- exp(-time_diffs / as.numeric(duration(decay_time))) + emmin <- c(x[1]) + for (i in seq(2, length(x))) { + emmin[i] <- min(a[i - 1] * emmin[i - 1], x[i]) } return(emmin) } - + new_tb1 <- data$df3 %>% mutate(emmin_int_col = custom_emmin_time(time_col, "PT3s", int_col)) new_th1 <- data$th3$ update_by(uby_emmin_time("time_col", "PT3s", "emmin_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df3 %>% group_by(bool_col) %>% mutate(emmin_int_col = custom_emmin_time(time_col, "PT3s", int_col)) new_th2 <- data$th3$ update_by(uby_emmin_time("time_col", "PT3s", "emmin_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + data$client$close() }) test_that("uby_emmax_tick behaves as expected", { data <- setup() - + custom_emmax <- function(decay_ticks, x) { if (length(x) == 1) { return(x) } - a = exp(-1/decay_ticks) - emmax = c(x[1]) - for(i in seq(2,length(x))) { - emmax[i] = max(a*emmax[i-1], x[i]) + a <- exp(-1 / decay_ticks) + emmax <- c(x[1]) + for (i in seq(2, length(x))) { + emmax[i] <- max(a * emmax[i - 1], x[i]) } return(emmax) } - + new_tb1 <- data$df1 %>% mutate(dbl_col = custom_emmax(2, dbl_col)) new_th1 <- data$th1$ update_by(uby_emmax_tick(2, "dbl_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df2 %>% mutate(col1 = custom_emmax(5, col1), col3 = custom_emmax(5, col3)) new_th2 <- data$th2$ update_by(uby_emmax_tick(5, c("col1", "col3"))) expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + new_tb3 <- data$df3 %>% group_by(bool_col) %>% mutate(emmax_int_col = custom_emmax(9, int_col)) new_th3 <- data$th3$ update_by(uby_emmax_tick(9, "emmax_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) - + new_tb4 <- data$df4 %>% group_by(X) %>% mutate(emmax_Number1 = custom_emmax(3, Number1), emmax_Number2 = custom_emmax(3, Number2)) new_th4 <- data$th4$ update_by(uby_emmax_tick(3, c("emmax_Number1 = Number1", "emmax_Number2 = Number2")), by = "X") expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) - + new_tb5 <- data$df5 %>% group_by(Y) %>% mutate(emmax_Number1 = custom_emmax(3, Number1), emmax_Number2 = custom_emmax(3, Number2)) new_th5 <- data$th5$ update_by(uby_emmax_tick(3, c("emmax_Number1 = Number1", "emmax_Number2 = Number2")), by = "Y") expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) - + new_tb6 <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% mutate(emmax_Number1 = custom_emmax(3, Number1), emmax_Number2 = custom_emmax(3, Number2)) new_th6 <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_emmax_tick(3, c("emmax_Number1 = Number1", "emmax_Number2 = Number2")), by = c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) - + data$client$close() }) test_that("uby_emmax_time behaves as expected", { data <- setup() - + custom_emmax_time <- function(ts, decay_time, x) { if (length(x) == 1) { return(x) } - time_diffs = as.numeric(ts[2:length(ts)] - ts[1:length(ts)-1]) - a = exp(-time_diffs/as.numeric(duration(decay_time))) - emmax = c(x[1]) - for(i in seq(2,length(x))) { - emmax[i] = max(a[i-1]*emmax[i-1], x[i]) + time_diffs <- as.numeric(ts[2:length(ts)] - ts[1:length(ts) - 1]) + a <- exp(-time_diffs / as.numeric(duration(decay_time))) + emmax <- c(x[1]) + for (i in seq(2, length(x))) { + emmax[i] <- max(a[i - 1] * emmax[i - 1], x[i]) } return(emmax) } - + new_tb1 <- data$df3 %>% mutate(emmax_int_col = custom_emmax_time(time_col, "PT3s", int_col)) new_th1 <- data$th3$ update_by(uby_emmax_time("time_col", "PT3s", "emmax_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df3 %>% group_by(bool_col) %>% mutate(emmax_int_col = custom_emmax_time(time_col, "PT3s", int_col)) new_th2 <- data$th3$ update_by(uby_emmax_time("time_col", "PT3s", "emmax_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + data$client$close() }) test_that("uby_emstd_tick behaves as expected", { data <- setup() - + custom_emstd <- function(decay_ticks, x) { if (length(x) == 1) { return(NA) } - a = exp(-1/decay_ticks) - current_ema = x[1] - emvar = c(0) - for(i in seq(2,length(x))) { - emvar[i] = a*(emvar[i-1] + (1-a)*((x[i] - current_ema)^2)) - current_ema = a*current_ema + (1-a)*x[i] + a <- exp(-1 / decay_ticks) + current_ema <- x[1] + emvar <- c(0) + for (i in seq(2, length(x))) { + emvar[i] <- a * (emvar[i - 1] + (1 - a) * ((x[i] - current_ema)^2)) + current_ema <- a * current_ema + (1 - a) * x[i] } - emvar[1] = NA + emvar[1] <- NA return(sqrt(emvar)) } @@ -765,632 +775,710 @@ test_that("uby_emstd_tick behaves as expected", { test_that("uby_emstd_time behaves as expected", { data <- setup() - + custom_emstd_time <- function(ts, decay_time, x) { if (length(x) == 1) { return(NA) } - time_diffs = as.numeric(ts[2:length(ts)] - ts[1:length(ts)-1]) - a = exp(-time_diffs/as.numeric(duration(decay_time))) - current_ema = x[1] - emvar = c(0) - for(i in seq(2,length(x))) { - emvar[i] = a[i-1]*(emvar[i-1] + (1-a[i-1])*((x[i] - current_ema)^2)) - current_ema = a[i-1]*current_ema + (1-a[i-1])*x[i] + time_diffs <- as.numeric(ts[2:length(ts)] - ts[1:length(ts) - 1]) + a <- exp(-time_diffs / as.numeric(duration(decay_time))) + current_ema <- x[1] + emvar <- c(0) + for (i in seq(2, length(x))) { + emvar[i] <- a[i - 1] * (emvar[i - 1] + (1 - a[i - 1]) * ((x[i] - current_ema)^2)) + current_ema <- a[i - 1] * current_ema + (1 - a[i - 1]) * x[i] } - emvar[1] = NA + emvar[1] <- NA return(sqrt(emvar)) } - + new_tb1 <- data$df3 %>% mutate(emstd_int_col = custom_emstd_time(time_col, "PT3s", int_col)) new_th1 <- data$th3$ update_by(uby_emstd_time("time_col", "PT3s", "emstd_int_col = int_col")) expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) - + new_tb2 <- data$df3 %>% group_by(bool_col) %>% mutate(emstd_int_col = custom_emstd_time(time_col, "PT3s", int_col)) new_th2 <- data$th3$ update_by(uby_emstd_time("time_col", "PT3s", "emstd_int_col = int_col"), by = "bool_col") expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) - + data$client$close() }) test_that("uby_rolling_sum_tick behaves as expected", { data <- setup() - + new_tb1a <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, sum, partial = TRUE, align = "right")) new_th1a <- data$th1$ update_by(uby_rolling_sum_tick("dbl_col", rev_ticks = 3)) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, sum, partial = TRUE, align = "left")) new_th1b <- data$th1$ update_by(uby_rolling_sum_tick("dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, sum, partial = TRUE, align = "center")) new_th1c <- data$th1$ update_by(uby_rolling_sum_tick("dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, sum, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, sum, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, sum, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, sum, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_sum_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, sum, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, sum, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, sum, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, sum, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_sum_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, sum, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, sum, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, sum, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, sum, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_sum_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) - + new_tb3a <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, sum, partial = TRUE, align = "right")) new_th3a <- data$th3$ update_by(uby_rolling_sum_tick("int_col", rev_ticks = 9), by = "bool_col") expect_equal(as.data.frame(new_th3a), as.data.frame(new_tb3a)) - + new_tb3b <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, sum, partial = TRUE, align = "left")) new_th3b <- data$th3$ update_by(uby_rolling_sum_tick("int_col", rev_ticks = 1, fwd_ticks = 8), by = "bool_col") expect_equal(as.data.frame(new_th3b), as.data.frame(new_tb3b)) - + new_tb3c <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, sum, partial = TRUE, align = "center")) new_th3c <- data$th3$ update_by(uby_rolling_sum_tick("int_col", rev_ticks = 5, fwd_ticks = 4), by = "bool_col") expect_equal(as.data.frame(new_th3c), as.data.frame(new_tb3c)) - + new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) - + new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) - + new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) - + new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) - + new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) - + new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) - + new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) - + new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) - + new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, sum, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, sum, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_sum_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) - + data$client$close() }) test_that("uby_rolling_sum_time behaves as expected", { data <- setup() - + new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, sum, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, sum, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_sum_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, sum, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, sum, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_sum_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, sum, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, sum, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_sum_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=sum, partial=TRUE, align="right", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = sum, partial = TRUE, align = "right", na.rm = TRUE)) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_sum_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=sum, partial=TRUE, align="left", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = sum, partial = TRUE, align = "left", na.rm = TRUE)) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_sum_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=sum, partial=TRUE, align="center", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = sum, partial = TRUE, align = "center", na.rm = TRUE)) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_sum_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + data$client$close() }) test_that("uby_rolling_group_tick behaves as expected", { data <- setup() - - right_group <- list(1.65, - c(1.6500, 3.1234), - c(1.6500, 3.1234, 100000.5000), - c(3.1234, 100000.5000, 543.234567), - c(100000.5000, 543.234567, 0.0000)) + + right_group <- list( + 1.65, + c(1.6500, 3.1234), + c(1.6500, 3.1234, 100000.5000), + c(3.1234, 100000.5000, 543.234567), + c(100000.5000, 543.234567, 0.0000) + ) new_th1a <- data$th1$ update_by(uby_rolling_group_tick("dbl_col_group = dbl_col", rev_ticks = 3)) expect_equal(as.list(as.data.frame(new_th1a)$dbl_col_group), right_group) - - - left_group <- list(c(1.6500, 3.1234, 100000.5000), - c(3.1234, 100000.5000, 543.234567), - c(100000.5000, 543.234567, 0.0000), - c(543.234567, 0.0000), - 0) + + + left_group <- list( + c(1.6500, 3.1234, 100000.5000), + c(3.1234, 100000.5000, 543.234567), + c(100000.5000, 543.234567, 0.0000), + c(543.234567, 0.0000), + 0 + ) new_th1b <- data$th1$ update_by(uby_rolling_group_tick("dbl_col_group = dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_equal(as.list(as.data.frame(new_th1b)$dbl_col_group), left_group) - - - center_group <- list(c(1.6500, 3.1234), - c(1.6500, 3.1234, 100000.5000), - c(3.1234, 100000.5000, 543.234567), - c(100000.5000, 543.234567, 0.0000), - c(543.234567, 0.0000)) + + + center_group <- list( + c(1.6500, 3.1234), + c(1.6500, 3.1234, 100000.5000), + c(3.1234, 100000.5000, 543.234567), + c(100000.5000, 543.234567, 0.0000), + c(543.234567, 0.0000) + ) new_th1c <- data$th1$ update_by(uby_rolling_group_tick("dbl_col_group = dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_equal(as.list(as.data.frame(new_th1c)$dbl_col_group), center_group) - + data$client$close() }) test_that("uby_rolling_group_time behaves as expected", { data <- setup() - - right_group <- c(lapply(1:9, function(x) 1:x), lapply(2:492, function(x) c(x:(x+8)))) + + right_group <- c(lapply(1:9, function(x) 1:x), lapply(2:492, function(x) c(x:(x + 8)))) new_th1a <- data$deterministic_th3$ update_by(uby_rolling_group_time("time_col", "int_col_group = int_col", "PT8s")) expect_equal(as.list(as.data.frame(new_th1a)$int_col_group), right_group) - - left_group <- c(lapply(1:491, function(x) c(x:(x+8))), lapply(492:500, function(x) x:500)) + + left_group <- c(lapply(1:491, function(x) c(x:(x + 8))), lapply(492:500, function(x) x:500)) new_th1b <- data$deterministic_th3$ update_by(uby_rolling_group_time("time_col", "int_col_group = int_col", "PT0s", "PT8s")) expect_equal(as.list(as.data.frame(new_th1b)$int_col_group), left_group) - - center_group <- c(lapply(5:9, function(x) 1:x), lapply(2:491, function(x) c(x:(x+8))), lapply(492:496, function(x) x:500)) + + center_group <- c(lapply(5:9, function(x) 1:x), lapply(2:491, function(x) c(x:(x + 8))), lapply(492:496, function(x) x:500)) new_th1c <- data$deterministic_th3$ update_by(uby_rolling_group_time("time_col", "int_col_group = int_col", "PT4s", "PT4s")) expect_equal(as.list(as.data.frame(new_th1c)$int_col_group), center_group) - + data$client$close() }) test_that("uby_rolling_avg_tick behaves as expected", { data <- setup() - + new_tb1a <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, mean, partial = TRUE, align = "right")) new_th1a <- data$th1$ update_by(uby_rolling_avg_tick("dbl_col", rev_ticks = 3)) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, mean, partial = TRUE, align = "left")) new_th1b <- data$th1$ update_by(uby_rolling_avg_tick("dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, mean, partial = TRUE, align = "center")) new_th1c <- data$th1$ update_by(uby_rolling_avg_tick("dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, mean, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, mean, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, mean, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, mean, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_avg_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, mean, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, mean, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, mean, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, mean, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_avg_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, mean, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, mean, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, mean, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, mean, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_avg_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) - + new_tb3a <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, mean, partial = TRUE, align = "right")) new_th3a <- data$th3$ update_by(uby_rolling_avg_tick("int_col", rev_ticks = 9), by = "bool_col") expect_equal(as.data.frame(new_th3a), as.data.frame(new_tb3a)) - + new_tb3b <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, mean, partial = TRUE, align = "left")) new_th3b <- data$th3$ update_by(uby_rolling_avg_tick("int_col", rev_ticks = 1, fwd_ticks = 8), by = "bool_col") expect_equal(as.data.frame(new_th3b), as.data.frame(new_tb3b)) - + new_tb3c <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, mean, partial = TRUE, align = "center")) new_th3c <- data$th3$ update_by(uby_rolling_avg_tick("int_col", rev_ticks = 5, fwd_ticks = 4), by = "bool_col") expect_equal(as.data.frame(new_th3c), as.data.frame(new_tb3c)) - + new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) - + new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) - + new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) - + new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) - + new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) - + new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) - + new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) - + new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) - + new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, mean, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, mean, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_avg_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) - + data$client$close() }) test_that("uby_rolling_avg_time behaves as expected", { data <- setup() - + new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, mean, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, mean, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_avg_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, mean, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, mean, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_avg_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, mean, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, mean, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_avg_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=mean, partial=TRUE, align="right", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = mean, partial = TRUE, align = "right", na.rm = TRUE)) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_avg_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=mean, partial=TRUE, align="left", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = mean, partial = TRUE, align = "left", na.rm = TRUE)) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_avg_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=mean, partial=TRUE, align="center", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = mean, partial = TRUE, align = "center", na.rm = TRUE)) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_avg_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + data$client$close() }) test_that("uby_rolling_min_tick behaves as expected", { data <- setup() - + new_tb1a <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, min, partial = TRUE, align = "right")) new_th1a <- data$th1$ update_by(uby_rolling_min_tick("dbl_col", rev_ticks = 3)) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, min, partial = TRUE, align = "left")) new_th1b <- data$th1$ update_by(uby_rolling_min_tick("dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, min, partial = TRUE, align = "center")) new_th1c <- data$th1$ update_by(uby_rolling_min_tick("dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, min, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, min, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, min, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, min, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_min_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, min, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, min, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, min, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, min, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_min_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, min, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, min, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, min, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, min, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_min_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) - + new_tb3a <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, min, partial = TRUE, align = "right")) new_th3a <- data$th3$ update_by(uby_rolling_min_tick("int_col", rev_ticks = 9), by = "bool_col") expect_equal(as.data.frame(new_th3a), as.data.frame(new_tb3a)) - + new_tb3b <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, min, partial = TRUE, align = "left")) new_th3b <- data$th3$ update_by(uby_rolling_min_tick("int_col", rev_ticks = 1, fwd_ticks = 8), by = "bool_col") expect_equal(as.data.frame(new_th3b), as.data.frame(new_tb3b)) - + new_tb3c <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, min, partial = TRUE, align = "center")) new_th3c <- data$th3$ update_by(uby_rolling_min_tick("int_col", rev_ticks = 5, fwd_ticks = 4), by = "bool_col") expect_equal(as.data.frame(new_th3c), as.data.frame(new_tb3c)) - + new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) - + new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) - + new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) - + new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) - + new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) - + new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) - + new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) - + new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) - + new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, min, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, min, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_min_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) - + data$client$close() }) test_that("uby_rolling_min_time behaves as expected", { data <- setup() - + new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, min, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, min, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_min_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, min, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, min, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_min_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, min, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, min, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_min_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=min, partial=TRUE, align="right", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = min, partial = TRUE, align = "right", na.rm = TRUE)) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_min_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=min, partial=TRUE, align="left", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = min, partial = TRUE, align = "left", na.rm = TRUE)) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_min_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=min, partial=TRUE, align="center", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = min, partial = TRUE, align = "center", na.rm = TRUE)) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_min_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + data$client$close() }) @@ -1416,22 +1504,28 @@ test_that("uby_rolling_max_tick behaves as expected", { expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, max, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, max, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, max, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, max, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_max_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, max, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, max, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, max, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, max, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_max_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, max, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, max, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, max, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, max, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_max_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) @@ -1459,72 +1553,90 @@ test_that("uby_rolling_max_tick behaves as expected", { new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, max, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, max, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_max_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) @@ -1534,405 +1646,455 @@ test_that("uby_rolling_max_tick behaves as expected", { test_that("uby_rolling_max_time behaves as expected", { data <- setup() - + new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, max, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, max, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_max_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, max, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, max, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_max_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, max, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, max, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_max_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=max, partial=TRUE, align="right", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = max, partial = TRUE, align = "right", na.rm = TRUE)) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_max_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=max, partial=TRUE, align="left", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = max, partial = TRUE, align = "left", na.rm = TRUE)) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_max_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=max, partial=TRUE, align="center", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = max, partial = TRUE, align = "center", na.rm = TRUE)) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_max_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + data$client$close() }) test_that("uby_rolling_prod_tick behaves as expected", { data <- setup() - + new_tb1a <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, prod, partial = TRUE, align = "right")) new_th1a <- data$th1$ update_by(uby_rolling_prod_tick("dbl_col", rev_ticks = 3)) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, prod, partial = TRUE, align = "left")) new_th1b <- data$th1$ update_by(uby_rolling_prod_tick("dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, prod, partial = TRUE, align = "center")) new_th1c <- data$th1$ update_by(uby_rolling_prod_tick("dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, prod, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, prod, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, prod, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, prod, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_prod_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, prod, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, prod, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, prod, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, prod, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_prod_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, prod, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, prod, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, prod, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, prod, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_prod_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) - + new_tb3a <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, prod, partial = TRUE, align = "right")) new_th3a <- data$th3$ update_by(uby_rolling_prod_tick("int_col", rev_ticks = 9), by = "bool_col") expect_equal(as.data.frame(new_th3a), as.data.frame(new_tb3a)) - + new_tb3b <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, prod, partial = TRUE, align = "left")) new_th3b <- data$th3$ update_by(uby_rolling_prod_tick("int_col", rev_ticks = 1, fwd_ticks = 8), by = "bool_col") expect_equal(as.data.frame(new_th3b), as.data.frame(new_tb3b)) - + new_tb3c <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, prod, partial = TRUE, align = "center")) new_th3c <- data$th3$ update_by(uby_rolling_prod_tick("int_col", rev_ticks = 5, fwd_ticks = 4), by = "bool_col") expect_equal(as.data.frame(new_th3c), as.data.frame(new_tb3c)) - + new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) - + new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) - + new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) - + new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) - + new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) - + new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) - + new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) - + new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) - + new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, prod, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, prod, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_prod_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) - + data$client$close() }) test_that("uby_rolling_prod_time behaves as expected", { data <- setup() - + new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, prod, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, prod, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_prod_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, prod, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, prod, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_prod_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, prod, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, prod, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_prod_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=prod, partial=TRUE, align="right", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = prod, partial = TRUE, align = "right", na.rm = TRUE)) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_prod_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=prod, partial=TRUE, align="left", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = prod, partial = TRUE, align = "left", na.rm = TRUE)) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_prod_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=prod, partial=TRUE, align="center", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = prod, partial = TRUE, align = "center", na.rm = TRUE)) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_prod_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + data$client$close() }) test_that("uby_rolling_count_tick behaves as expected", { data <- setup() - + new_tb1a <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, length, partial = TRUE, align = "right")) new_th1a <- data$th1$ update_by(uby_rolling_count_tick("dbl_col", rev_ticks = 3)) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, length, partial = TRUE, align = "left")) new_th1b <- data$th1$ update_by(uby_rolling_count_tick("dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- data$df1 %>% mutate(dbl_col = rollapply(dbl_col, 3, length, partial = TRUE, align = "center")) new_th1c <- data$th1$ update_by(uby_rolling_count_tick("dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, length, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, length, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, length, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, length, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_count_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, length, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, length, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, length, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, length, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_count_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, length, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, length, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, length, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, length, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_count_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) - + new_tb3a <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, length, partial = TRUE, align = "right")) new_th3a <- data$th3$ update_by(uby_rolling_count_tick("int_col", rev_ticks = 9), by = "bool_col") expect_equal(as.data.frame(new_th3a), as.data.frame(new_tb3a)) - + new_tb3b <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, length, partial = TRUE, align = "left")) new_th3b <- data$th3$ update_by(uby_rolling_count_tick("int_col", rev_ticks = 1, fwd_ticks = 8), by = "bool_col") expect_equal(as.data.frame(new_th3b), as.data.frame(new_tb3b)) - + new_tb3c <- data$df3 %>% group_by(bool_col) %>% mutate(int_col = rollapply(int_col, 9, length, partial = TRUE, align = "center")) new_th3c <- data$th3$ update_by(uby_rolling_count_tick("int_col", rev_ticks = 5, fwd_ticks = 4), by = "bool_col") expect_equal(as.data.frame(new_th3c), as.data.frame(new_tb3c)) - + new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) - + new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) - + new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) - + new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) - + new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) - + new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) - + new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) - + new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) - + new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, length, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, length, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_count_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) - + data$client$close() }) test_that("uby_rolling_count_time behaves as expected", { data <- setup() - - custom_count <- function(x) {return(sum(!is.na(x)))} - + + custom_count <- function(x) { + return(sum(!is.na(x))) + } + new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, length, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, length, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_count_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, length, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, length, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_count_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, length, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, length, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_count_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=custom_count, partial=TRUE, align="right")) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = custom_count, partial = TRUE, align = "right")) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_count_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=custom_count, partial=TRUE, align="left")) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = custom_count, partial = TRUE, align = "left")) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_count_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=custom_count, partial=TRUE, align="center")) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = custom_count, partial = TRUE, align = "center")) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_count_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + data$client$close() }) @@ -1958,22 +2120,28 @@ test_that("uby_rolling_std_tick behaves as expected", { expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) new_tb2a <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, sd, partial = TRUE, align = "right"), - col3 = rollapply(col3, 5, sd, partial = TRUE, align = "right")) + mutate( + col1 = rollapply(col1, 5, sd, partial = TRUE, align = "right"), + col3 = rollapply(col3, 5, sd, partial = TRUE, align = "right") + ) new_th2a <- data$th2$ update_by(uby_rolling_std_tick(c("col1", "col3"), rev_ticks = 5)) expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) new_tb2b <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, sd, partial = TRUE, align = "left"), - col3 = rollapply(col3, 5, sd, partial = TRUE, align = "left")) + mutate( + col1 = rollapply(col1, 5, sd, partial = TRUE, align = "left"), + col3 = rollapply(col3, 5, sd, partial = TRUE, align = "left") + ) new_th2b <- data$th2$ update_by(uby_rolling_std_tick(c("col1", "col3"), rev_ticks = 1, fwd_ticks = 4)) expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) new_tb2c <- data$df2 %>% - mutate(col1 = rollapply(col1, 5, sd, partial = TRUE, align = "center"), - col3 = rollapply(col3, 5, sd, partial = TRUE, align = "center")) + mutate( + col1 = rollapply(col1, 5, sd, partial = TRUE, align = "center"), + col3 = rollapply(col3, 5, sd, partial = TRUE, align = "center") + ) new_th2c <- data$th2$ update_by(uby_rolling_std_tick(c("col1", "col3"), rev_ticks = 3, fwd_ticks = 2)) expect_equal(as.data.frame(new_th2c), as.data.frame(new_tb2c)) @@ -2001,72 +2169,90 @@ test_that("uby_rolling_std_tick behaves as expected", { new_tb4a <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "right") + ) new_th4a <- data$th4$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_equal(as.data.frame(new_th4a), as.data.frame(new_tb4a)) new_tb4b <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "left") + ) new_th4b <- data$th4$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_equal(as.data.frame(new_th4b), as.data.frame(new_tb4b)) new_tb4c <- data$df4 %>% group_by(X) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "center") + ) new_th4c <- data$th4$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_equal(as.data.frame(new_th4c), as.data.frame(new_tb4c)) new_tb5a <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "right") + ) new_th5a <- data$th5$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_equal(as.data.frame(new_th5a), as.data.frame(new_tb5a)) new_tb5b <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "left") + ) new_th5b <- data$th5$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_equal(as.data.frame(new_th5b), as.data.frame(new_tb5b)) new_tb5c <- data$df5 %>% group_by(Y) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "center") + ) new_th5c <- data$th5$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_equal(as.data.frame(new_th5c), as.data.frame(new_tb5c)) new_tb6a <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "right"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "right")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "right"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "right") + ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_equal(as.data.frame(new_th6a), as.data.frame(new_tb6a)) new_tb6b <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "left"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "left")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "left"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "left") + ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_equal(as.data.frame(new_th6b), as.data.frame(new_tb6b)) new_tb6c <- rbind(data$df4, data$df5, data$df4, data$df5) %>% group_by(X, Y) %>% - mutate(Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "center"), - Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "center")) + mutate( + Number1 = rollapply(Number1, 3, sd, partial = TRUE, align = "center"), + Number2 = rollapply(Number2, 3, sd, partial = TRUE, align = "center") + ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_std_tick(c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_equal(as.data.frame(new_th6c), as.data.frame(new_tb6c)) @@ -2078,37 +2264,37 @@ test_that("uby_rolling_std_time behaves as expected", { data <- setup() new_tb1a <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, sd, partial=TRUE, align="right")) + mutate(int_col = rollapply(int_col, 9, sd, partial = TRUE, align = "right")) new_th1a <- head(data$th3, 500)$ update_by(uby_rolling_std_time("time_col", "int_col", "PT8s")) expect_equal(as.data.frame(new_th1a), as.data.frame(new_tb1a)) - + new_tb1b <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, sd, partial=TRUE, align="left")) + mutate(int_col = rollapply(int_col, 9, sd, partial = TRUE, align = "left")) new_th1b <- head(data$th3, 500)$ update_by(uby_rolling_std_time("time_col", "int_col", "PT0s", "PT8s")) expect_equal(as.data.frame(new_th1b), as.data.frame(new_tb1b)) - + new_tb1c <- head(data$df3, 500) %>% - mutate(int_col = rollapply(int_col, 9, sd, partial=TRUE, align="center")) + mutate(int_col = rollapply(int_col, 9, sd, partial = TRUE, align = "center")) new_th1c <- head(data$th3, 500)$ update_by(uby_rolling_std_time("time_col", "int_col", "PT4s", "PT4s")) expect_equal(as.data.frame(new_th1c), as.data.frame(new_tb1c)) - + new_tb2a <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=sd, partial=TRUE, align="right", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = sd, partial = TRUE, align = "right", na.rm = TRUE)) new_th2a <- head(data$th3, 500)$ update_by(uby_rolling_std_time("time_col", "int_col", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2a), as.data.frame(new_tb2a)) - + new_tb2b <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=sd, partial=TRUE, align="left", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = sd, partial = TRUE, align = "left", na.rm = TRUE)) new_th2b <- head(data$th3, 500)$ update_by(uby_rolling_std_time("time_col", "int_col", "PT0s", "PT8s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) - + new_tb2c <- head(data$df3, 500) %>% - mutate(int_col = custom_rolling_time_op(int_col, bool_col, width=9, FUN=sd, partial=TRUE, align="center", na.rm=TRUE)) + mutate(int_col = custom_rolling_time_op(int_col, bool_col, width = 9, FUN = sd, partial = TRUE, align = "center", na.rm = TRUE)) new_th2c <- head(data$th3, 500)$ update_by(uby_rolling_std_time("time_col", "int_col", "PT4s", "PT4s"), by = "bool_col") expect_equal(as.data.frame(new_th2b), as.data.frame(new_tb2b)) @@ -2118,10 +2304,10 @@ test_that("uby_rolling_std_time behaves as expected", { test_that("uby_rolling_wavg_tick behaves as expected", { data <- setup() - + # There is not a clean analog to our grouped weighted average in R, so we create # these tables directly - + new_df1a <- data.frame( string_col = c("I", "am", "a", "string", "column"), int_col = c(0, 1, 2, 3, 4), @@ -2130,7 +2316,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th1a <- data$th1$ update_by(uby_rolling_wavg_tick("int_col", "dbl_col", rev_ticks = 3)) expect_true(all.equal(as.data.frame(new_th1a), new_df1a, tolerance = 1e-4)) - + new_df1b <- data.frame( string_col = c("I", "am", "a", "string", "column"), int_col = c(0, 1, 2, 3, 4), @@ -2139,7 +2325,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th1b <- data$th1$ update_by(uby_rolling_wavg_tick("int_col", "dbl_col", rev_ticks = 1, fwd_ticks = 2)) expect_true(all.equal(as.data.frame(new_th1b), new_df1b, tolerance = 1e-4)) - + new_df1c <- data.frame( string_col = c("I", "am", "a", "string", "column"), int_col = c(0, 1, 2, 3, 4), @@ -2148,7 +2334,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th1c <- data$th1$ update_by(uby_rolling_wavg_tick("int_col", "dbl_col", rev_ticks = 2, fwd_ticks = 1)) expect_true(all.equal(as.data.frame(new_th1c), new_df1c, tolerance = 1e-4)) - + new_df4a <- data.frame( X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), @@ -2158,7 +2344,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th4a <- data$th4$ update_by(uby_rolling_wavg_tick("Number1", c("Number1", "Number2"), rev_ticks = 3), by = "X") expect_true(all.equal(as.data.frame(new_th4a), new_df4a, tolerance = 1e-4)) - + new_df4b <- data.frame( X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), @@ -2168,7 +2354,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th4b <- data$th4$ update_by(uby_rolling_wavg_tick("Number1", c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "X") expect_true(all.equal(as.data.frame(new_th4b), new_df4b, tolerance = 1e-4)) - + new_df4c <- data.frame( X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), @@ -2178,7 +2364,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th4c <- data$th4$ update_by(uby_rolling_wavg_tick("Number1", c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "X") expect_true(all.equal(as.data.frame(new_th4c), new_df4c, tolerance = 1e-4)) - + new_df5a <- data.frame( X = c("B", "C", "B", "A", "A", "C", "B", "C", "B", "A"), Y = c("N", "N", "M", "P", "O", "P", "O", "N", "O", "O"), @@ -2188,7 +2374,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th5a <- data$th5$ update_by(uby_rolling_wavg_tick("Number2", c("Number1", "Number2"), rev_ticks = 3), by = "Y") expect_true(all.equal(as.data.frame(new_th5a), new_df5a, tolerance = 1e-4)) - + new_df5b <- data.frame( X = c("B", "C", "B", "A", "A", "C", "B", "C", "B", "A"), Y = c("N", "N", "M", "P", "O", "P", "O", "N", "O", "O"), @@ -2198,7 +2384,7 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th5b <- data$th5$ update_by(uby_rolling_wavg_tick("Number2", c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = "Y") expect_true(all.equal(as.data.frame(new_th5b), new_df5b, tolerance = 1e-4)) - + new_df5c <- data.frame( X = c("B", "C", "B", "A", "A", "C", "B", "C", "B", "A"), Y = c("N", "N", "M", "P", "O", "P", "O", "N", "O", "O"), @@ -2208,205 +2394,243 @@ test_that("uby_rolling_wavg_tick behaves as expected", { new_th5c <- data$th5$ update_by(uby_rolling_wavg_tick("Number2", c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = "Y") expect_true(all.equal(as.data.frame(new_th5c), new_df5c, tolerance = 1e-4)) - + new_df6a <- data.frame( - X = c("A", "B", "A", "C", "B", "A", "B", "B", "C", "B", - "C", "B", "A", "A", "C", "B", "C", "B", "A", "A", - "B", "A", "C", "B", "A", "B", "B", "C", "B", "C", - "B", "A", "A", "C", "B", "C", "B", "A"), - Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M", "N", - "N", "M", "P", "O", "P", "O", "N", "O", "O", "M", - "N", "O", "N", "P", "M", "O", "P", "M", "N", "N", - "M", "P", "O", "P", "O", "N", "O", "O"), - Number1 = c(100.00000, -44.00000, 49.00000, 11.00000, -66.00000, - 83.33333, 29.00000, -97.50000, -70.00000, 451.00000, - 63.91566, 86.00000, -45.00000, 48.04000, NA, 320.49733, - 529.44444, 274.13742, 53.93333, 90.00000, - -209.00000, 53.93333, 529.44444, -79.26316, - 75.00000, 274.13742, -166.80000, -70.00000, 121.00000, - 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, - 529.44444, 274.13742, 53.93333), - Number2 = c(-55.00000, 76.00000, 20.00000, 130.00000, 230.00000, - -53.33333, 73.00000, 264.87500, 214.00000, 76.00000, - 20.69880, -6.00000, 34.00000, 19.84000, NA, 47.17112, - 113.50000, 44.41438, 21.37778, -54.00000, - 76.00000, 21.37778, 113.50000, 244.68421, - -52.50000, 44.41438, 341.60000, 214.00000, 76.00000, - 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, - 113.50000, 44.41438, 21.37778) + X = c( + "A", "B", "A", "C", "B", "A", "B", "B", "C", "B", + "C", "B", "A", "A", "C", "B", "C", "B", "A", "A", + "B", "A", "C", "B", "A", "B", "B", "C", "B", "C", + "B", "A", "A", "C", "B", "C", "B", "A" + ), + Y = c( + "M", "N", "O", "N", "P", "M", "O", "P", "M", "N", + "N", "M", "P", "O", "P", "O", "N", "O", "O", "M", + "N", "O", "N", "P", "M", "O", "P", "M", "N", "N", + "M", "P", "O", "P", "O", "N", "O", "O" + ), + Number1 = c( + 100.00000, -44.00000, 49.00000, 11.00000, -66.00000, + 83.33333, 29.00000, -97.50000, -70.00000, 451.00000, + 63.91566, 86.00000, -45.00000, 48.04000, NA, 320.49733, + 529.44444, 274.13742, 53.93333, 90.00000, + -209.00000, 53.93333, 529.44444, -79.26316, + 75.00000, 274.13742, -166.80000, -70.00000, 121.00000, + 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, + 529.44444, 274.13742, 53.93333 + ), + Number2 = c( + -55.00000, 76.00000, 20.00000, 130.00000, 230.00000, + -53.33333, 73.00000, 264.87500, 214.00000, 76.00000, + 20.69880, -6.00000, 34.00000, 19.84000, NA, 47.17112, + 113.50000, 44.41438, 21.37778, -54.00000, + 76.00000, 21.37778, 113.50000, 244.68421, + -52.50000, 44.41438, 341.60000, 214.00000, 76.00000, + 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, + 113.50000, 44.41438, 21.37778 + ) ) new_th6a <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_wavg_tick("Number1", c("Number1", "Number2"), rev_ticks = 3), by = c("X", "Y")) expect_true(all.equal(as.data.frame(new_th6a), new_df6a, tolerance = 1e-4)) - + new_df6b <- data.frame( - X = c("A", "B", "A", "C", "B", "A", "B", "B", "C", "B", - "C", "B", "A", "A", "C", "B", "C", "B", "A", "A", - "B", "A", "C", "B", "A", "B", "B", "C", "B", "C", - "B", "A", "A", "C", "B", "C", "B", "A"), - Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M", "N", - "N", "M", "P", "O", "P", "O", "N", "O", "O", "M", - "N", "O", "N", "P", "M", "O", "P", "M", "N", "N", - "M", "P", "O", "P", "O", "N", "O", "O"), - Number1 = c(90.00000, -209.00000, 53.93333, 529.44444, -79.26316, - 75.00000, 274.13742, -166.80000, -70.00000, 121.00000, - 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, - 529.44444, 274.13742, 53.93333, 83.33333, - 451.00000, 53.93333, 529.44444, -97.50000, - 50.00000, 274.13742, 18.00000, -70.00000, 55.00000, - 1344.14286, 86.00000, -45.00000, -6.50000, NA, 290.14865, - -65.00000, 99.00000, -5.00000), - Number2 = c(-54.00000, 76.00000, 21.37778, 113.50000, 244.68421, - -52.50000, 44.41438, 341.60000, 214.00000, 76.00000, - 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, - 113.50000, 44.41438, 21.37778, -53.33333, - 76.00000, 21.37778, 113.50000, 264.87500, - -50.00000, 44.41438, 137.00000, 214.00000, 76.00000, - 87.57143, -6.00000, 34.00000, 4.50000, NA, 42.54730, - -5.00000, 34.00000, 6.00000) + X = c( + "A", "B", "A", "C", "B", "A", "B", "B", "C", "B", + "C", "B", "A", "A", "C", "B", "C", "B", "A", "A", + "B", "A", "C", "B", "A", "B", "B", "C", "B", "C", + "B", "A", "A", "C", "B", "C", "B", "A" + ), + Y = c( + "M", "N", "O", "N", "P", "M", "O", "P", "M", "N", + "N", "M", "P", "O", "P", "O", "N", "O", "O", "M", + "N", "O", "N", "P", "M", "O", "P", "M", "N", "N", + "M", "P", "O", "P", "O", "N", "O", "O" + ), + Number1 = c( + 90.00000, -209.00000, 53.93333, 529.44444, -79.26316, + 75.00000, 274.13742, -166.80000, -70.00000, 121.00000, + 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, + 529.44444, 274.13742, 53.93333, 83.33333, + 451.00000, 53.93333, 529.44444, -97.50000, + 50.00000, 274.13742, 18.00000, -70.00000, 55.00000, + 1344.14286, 86.00000, -45.00000, -6.50000, NA, 290.14865, + -65.00000, 99.00000, -5.00000 + ), + Number2 = c( + -54.00000, 76.00000, 21.37778, 113.50000, 244.68421, + -52.50000, 44.41438, 341.60000, 214.00000, 76.00000, + 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, + 113.50000, 44.41438, 21.37778, -53.33333, + 76.00000, 21.37778, 113.50000, 264.87500, + -50.00000, 44.41438, 137.00000, 214.00000, 76.00000, + 87.57143, -6.00000, 34.00000, 4.50000, NA, 42.54730, + -5.00000, 34.00000, 6.00000 + ) ) new_th6b <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_wavg_tick("Number1", c("Number1", "Number2"), rev_ticks = 1, fwd_ticks = 2), by = c("X", "Y")) expect_true(all.equal(as.data.frame(new_th6b), new_df6b, tolerance = 1e-4)) - + new_df6c <- data.frame( - X = c("A", "B", "A", "C", "B", "A", "B", "B", "C", "B", - "C", "B", "A", "A", "C", "B", "C", "B", "A", "A", - "B", "A", "C", "B", "A", "B", "B", "C", "B", "C", - "B", "A", "A", "C", "B", "C", "B", "A"), - Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M", "N", - "N", "M", "P", "O", "P", "O", "N", "O", "O", "M", - "N", "O", "N", "P", "M", "O", "P", "M", "N", "N", - "M", "P", "O", "P", "O", "N", "O", "O"), - Number1 = c(83.33333, 451.00000, 48.04000, 63.91566, -97.50000, - 90.00000, 320.49733, -79.26316, -70.00000, -209.00000, - 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, - 529.44444, 274.13742, 53.93333, 75.00000, - 121.00000, 53.93333, 529.44444, -166.80000, - 83.33333, 274.13742, -97.50000, -70.00000, 451.00000, - 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, - 1344.14286, 290.14865, -6.50000), - Number2 = c(-53.33333, 76.00000, 19.84000, 20.69880, 264.87500, - -54.00000, 47.17112, 244.68421, 214.00000, 76.00000, - 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, - 113.50000, 44.41438, 21.37778, -52.50000, - 76.00000, 21.37778, 113.50000, 341.60000, - -53.33333, 44.41438, 264.87500, 214.00000, 76.00000, - 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, - 87.57143, 42.54730, 4.50000) + X = c( + "A", "B", "A", "C", "B", "A", "B", "B", "C", "B", + "C", "B", "A", "A", "C", "B", "C", "B", "A", "A", + "B", "A", "C", "B", "A", "B", "B", "C", "B", "C", + "B", "A", "A", "C", "B", "C", "B", "A" + ), + Y = c( + "M", "N", "O", "N", "P", "M", "O", "P", "M", "N", + "N", "M", "P", "O", "P", "O", "N", "O", "O", "M", + "N", "O", "N", "P", "M", "O", "P", "M", "N", "N", + "M", "P", "O", "P", "O", "N", "O", "O" + ), + Number1 = c( + 83.33333, 451.00000, 48.04000, 63.91566, -97.50000, + 90.00000, 320.49733, -79.26316, -70.00000, -209.00000, + 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, + 529.44444, 274.13742, 53.93333, 75.00000, + 121.00000, 53.93333, 529.44444, -166.80000, + 83.33333, 274.13742, -97.50000, -70.00000, 451.00000, + 529.44444, 86.00000, -45.00000, 53.93333, NA, 274.13742, + 1344.14286, 290.14865, -6.50000 + ), + Number2 = c( + -53.33333, 76.00000, 19.84000, 20.69880, 264.87500, + -54.00000, 47.17112, 244.68421, 214.00000, 76.00000, + 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, + 113.50000, 44.41438, 21.37778, -52.50000, + 76.00000, 21.37778, 113.50000, 341.60000, + -53.33333, 44.41438, 264.87500, 214.00000, 76.00000, + 113.50000, -6.00000, 34.00000, 21.37778, NA, 44.41438, + 87.57143, 42.54730, 4.50000 + ) ) new_th6c <- merge_tables(data$th4, data$th5, data$th4, data$th5)$ update_by(uby_rolling_wavg_tick("Number1", c("Number1", "Number2"), rev_ticks = 2, fwd_ticks = 1), by = c("X", "Y")) expect_true(all.equal(as.data.frame(new_th6c), new_df6c, tolerance = 1e-4)) - + data$client$close() }) test_that("uby_rolling_wavg_time behaves as expected", { data <- setup() - + # Need to append a weight column to the df and th data$deterministic_df3 <- data$deterministic_df3 %>% mutate(weight_col = sqrt(int_col)) data$deterministic_th3 <- data$deterministic_th3$ update("weight_col = sqrt(int_col)") - + base_df <- data.frame( time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.001), by = "1 sec")[1:50], - bool_col = c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, - TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, - TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, - TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, - TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE) + bool_col = c( + TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, + TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, + TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, + TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, + TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE + ) ) - - new_df1a <- cbind(base_df, "int_col" = c(1.000000, 1.585786, 2.176557, 2.769907, 3.364806, 3.960724, - 4.557357, 5.154516, 5.752074, 6.599146, 7.501993, 8.433335, - 9.381805, 10.341536, 11.309121, 12.282429, 13.260044, 14.240990, - 15.224566, 16.210258, 17.197678, 18.186530, 19.176580, 20.167643, - 21.159573, 22.152247, 23.145567, 24.139451, 25.133831, 26.128647, - 27.123851, 28.119401, 29.115260, 30.111398, 31.107787, 32.104403, - 33.101225, 34.098236, 35.095418, 36.092758, 37.090243, 38.087860, - 39.085600, 40.083454, 41.081413, 42.079469, 43.077616, 44.075848, - 45.074159, 46.072543)) + + new_df1a <- cbind(base_df, "int_col" = c( + 1.000000, 1.585786, 2.176557, 2.769907, 3.364806, 3.960724, + 4.557357, 5.154516, 5.752074, 6.599146, 7.501993, 8.433335, + 9.381805, 10.341536, 11.309121, 12.282429, 13.260044, 14.240990, + 15.224566, 16.210258, 17.197678, 18.186530, 19.176580, 20.167643, + 21.159573, 22.152247, 23.145567, 24.139451, 25.133831, 26.128647, + 27.123851, 28.119401, 29.115260, 30.111398, 31.107787, 32.104403, + 33.101225, 34.098236, 35.095418, 36.092758, 37.090243, 38.087860, + 39.085600, 40.083454, 41.081413, 42.079469, 43.077616, 44.075848, + 45.074159, 46.072543 + )) new_th1a <- head(data$deterministic_th3, 50)$ update_by(uby_rolling_wavg_time("time_col", "weight_col", "int_col", "PT8s"))$ drop_columns("weight_col") expect_equal(as.data.frame(new_th1a), new_df1a) - - new_df1b <- cbind(base_df, "int_col" = c(5.752074, 6.599146, 7.501993, 8.433335, 9.381805, 10.341536, - 11.309121, 12.282429, 13.260044, 14.240990, 15.224566, - 16.210258, 17.197678, 18.186530, 19.176580, 20.167643, - 21.159573, 22.152247, 23.145567, 24.139451, 25.133831, - 26.128647, 27.123851, 28.119401, 29.115260, 30.111398, - 31.107787, 32.104403, 33.101225, 34.098236, 35.095418, - 36.092758, 37.090243, 38.087860, 39.085600, 40.083454, - 41.081413, 42.079469, 43.077616, 44.075848, 45.074159, - 46.072543, 46.556499, 47.042580, 47.530715, 48.020839, - 48.512889, 49.006803, 49.502525, 50.000000)) + + new_df1b <- cbind(base_df, "int_col" = c( + 5.752074, 6.599146, 7.501993, 8.433335, 9.381805, 10.341536, + 11.309121, 12.282429, 13.260044, 14.240990, 15.224566, + 16.210258, 17.197678, 18.186530, 19.176580, 20.167643, + 21.159573, 22.152247, 23.145567, 24.139451, 25.133831, + 26.128647, 27.123851, 28.119401, 29.115260, 30.111398, + 31.107787, 32.104403, 33.101225, 34.098236, 35.095418, + 36.092758, 37.090243, 38.087860, 39.085600, 40.083454, + 41.081413, 42.079469, 43.077616, 44.075848, 45.074159, + 46.072543, 46.556499, 47.042580, 47.530715, 48.020839, + 48.512889, 49.006803, 49.502525, 50.000000 + )) new_th1b <- head(data$deterministic_th3, 50)$ update_by(uby_rolling_wavg_time("time_col", "weight_col", "int_col", "PT0s", "PT8s"))$ drop_columns("weight_col") expect_equal(as.data.frame(new_th1b), new_df1b) - - new_df1c <- cbind(base_df, "int_col" = c(3.364806, 3.960724, 4.557357, 5.154516, 5.752074, 6.599146, - 7.501993, 8.433335, 9.381805, 10.341536, 11.309121, - 12.282429, 13.260044, 14.240990, 15.224566, 16.210258, - 17.197678, 18.186530, 19.176580, 20.167643, 21.159573, - 22.152247, 23.145567, 24.139451, 25.133831, 26.128647, - 27.123851, 28.119401, 29.115260, 30.111398, 31.107787, - 32.104403, 33.101225, 34.098236, 35.095418, 36.092758, - 37.090243, 38.087860, 39.085600, 40.083454, 41.081413, - 42.079469, 43.077616, 44.075848, 45.074159, 46.072543, - 46.556499, 47.042580, 47.530715, 48.020839)) + + new_df1c <- cbind(base_df, "int_col" = c( + 3.364806, 3.960724, 4.557357, 5.154516, 5.752074, 6.599146, + 7.501993, 8.433335, 9.381805, 10.341536, 11.309121, + 12.282429, 13.260044, 14.240990, 15.224566, 16.210258, + 17.197678, 18.186530, 19.176580, 20.167643, 21.159573, + 22.152247, 23.145567, 24.139451, 25.133831, 26.128647, + 27.123851, 28.119401, 29.115260, 30.111398, 31.107787, + 32.104403, 33.101225, 34.098236, 35.095418, 36.092758, + 37.090243, 38.087860, 39.085600, 40.083454, 41.081413, + 42.079469, 43.077616, 44.075848, 45.074159, 46.072543, + 46.556499, 47.042580, 47.530715, 48.020839 + )) new_th1c <- head(data$deterministic_th3, 50)$ update_by(uby_rolling_wavg_time("time_col", "weight_col", "int_col", "PT4s", "PT4s"))$ drop_columns("weight_col") expect_equal(as.data.frame(new_th1c), new_df1c) - - new_df2a <- cbind(base_df, "int_col" = c(1.000000, 1.585786, 3.000000, 3.535898, 3.227496, 4.512320, - 4.595515, 5.607180, 6.454681, 6.782586, 8.609158, 9.401493, - 9.357245, 10.469018, 11.316537, 12.394463, 13.259445, - 15.370444, 16.163520, 16.268778, 18.407602, 19.182620, - 19.169030, 20.230497, 21.162927, 22.211264, 23.146719, - 25.322767, 26.100235, 26.164183, 28.351280, 29.118634, - 29.111044, 30.153311, 31.109992, 32.144589, 33.102407, - 35.302156, 36.072311, 36.118326, 38.324651, 39.087902, - 39.082723, 40.114920, 41.083063, 42.109955, 43.078675, - 45.290649, 46.056564, 46.092521)) + + new_df2a <- cbind(base_df, "int_col" = c( + 1.000000, 1.585786, 3.000000, 3.535898, 3.227496, 4.512320, + 4.595515, 5.607180, 6.454681, 6.782586, 8.609158, 9.401493, + 9.357245, 10.469018, 11.316537, 12.394463, 13.259445, + 15.370444, 16.163520, 16.268778, 18.407602, 19.182620, + 19.169030, 20.230497, 21.162927, 22.211264, 23.146719, + 25.322767, 26.100235, 26.164183, 28.351280, 29.118634, + 29.111044, 30.153311, 31.109992, 32.144589, 33.102407, + 35.302156, 36.072311, 36.118326, 38.324651, 39.087902, + 39.082723, 40.114920, 41.083063, 42.109955, 43.078675, + 45.290649, 46.056564, 46.092521 + )) new_th2a <- head(data$deterministic_th3, 50)$ update_by(uby_rolling_wavg_time("time_col", "weight_col", "int_col", "PT8s"), by = "bool_col")$ drop_columns("weight_col") expect_equal(as.data.frame(new_th2a), new_df2a) - - new_df2b <- cbind(base_df, "int_col" = c(4.595515, 6.782586, 6.454681, 7.036869, 9.401493, 10.469018, - 11.316537, 12.394463, 13.260793, 13.259445, 13.956975, - 16.268778, 16.163520, 16.861439, 19.182620, 20.230497, - 21.162927, 22.211264, 23.144128, 23.146719, 23.869532, - 26.164183, 26.100235, 26.819386, 29.118634, 30.153311, - 31.109992, 32.144589, 33.099748, 33.102407, 33.834106, - 36.118326, 36.072311, 36.800397, 39.087902, 40.114920, - 41.083063, 42.109955, 43.076293, 43.078675, 43.814892, - 46.092521, 46.056564, 46.789573, 47.377845, 47.683028, - 48.523201, 48.502577, 49.000000, 50.000000)) + + new_df2b <- cbind(base_df, "int_col" = c( + 4.595515, 6.782586, 6.454681, 7.036869, 9.401493, 10.469018, + 11.316537, 12.394463, 13.260793, 13.259445, 13.956975, + 16.268778, 16.163520, 16.861439, 19.182620, 20.230497, + 21.162927, 22.211264, 23.144128, 23.146719, 23.869532, + 26.164183, 26.100235, 26.819386, 29.118634, 30.153311, + 31.109992, 32.144589, 33.099748, 33.102407, 33.834106, + 36.118326, 36.072311, 36.800397, 39.087902, 40.114920, + 41.083063, 42.109955, 43.076293, 43.078675, 43.814892, + 46.092521, 46.056564, 46.789573, 47.377845, 47.683028, + 48.523201, 48.502577, 49.000000, 50.000000 + )) new_th2b <- head(data$deterministic_th3, 50)$ update_by(uby_rolling_wavg_time("time_col", "weight_col", "int_col", "PT0s", "PT8s"), by = "bool_col")$ drop_columns("weight_col") expect_equal(as.data.frame(new_th2b), new_df2b) - - new_df2c <- cbind(base_df, "int_col" = c(3.227496, 3.227496, 4.512320, 5.607180, 4.595515, 6.454681, - 8.609158, 7.036869, 9.357245, 10.183304, 11.316537, 12.143151, - 13.260793, 15.370444, 13.956975, 16.163520, 18.407602, 16.861439, - 19.169030, 20.089214, 21.162927, 22.078589, 23.144128, 25.322767, - 23.869532, 26.100235, 28.351280, 26.819386, 29.111044, 30.059047, - 31.109992, 32.054206, 33.099748, 35.302156, 33.834106, 36.072311, - 38.324651, 36.800397, 39.082723, 40.044139, 41.083063, 42.041378, - 43.076293, 45.290649, 43.814892, 46.056564, 47.377845, 46.789573, - 47.683028, 48.523201)) + + new_df2c <- cbind(base_df, "int_col" = c( + 3.227496, 3.227496, 4.512320, 5.607180, 4.595515, 6.454681, + 8.609158, 7.036869, 9.357245, 10.183304, 11.316537, 12.143151, + 13.260793, 15.370444, 13.956975, 16.163520, 18.407602, 16.861439, + 19.169030, 20.089214, 21.162927, 22.078589, 23.144128, 25.322767, + 23.869532, 26.100235, 28.351280, 26.819386, 29.111044, 30.059047, + 31.109992, 32.054206, 33.099748, 35.302156, 33.834106, 36.072311, + 38.324651, 36.800397, 39.082723, 40.044139, 41.083063, 42.041378, + 43.076293, 45.290649, 43.814892, 46.056564, 47.377845, 46.789573, + 47.683028, 48.523201 + )) new_th2c <- head(data$deterministic_th3, 50)$ update_by(uby_rolling_wavg_time("time_col", "weight_col", "int_col", "PT4s", "PT4s"), by = "bool_col")$ drop_columns("weight_col") expect_equal(as.data.frame(new_th2c), new_df2c) - + data$client$close() }) diff --git a/R/rdeephaven/inst/tests/testthat/test_update_by_ops_wrappers.R b/R/rdeephaven/inst/tests/testthat/test_update_by_ops_wrappers.R index 4fb7baed427..250f25078c8 100644 --- a/R/rdeephaven/inst/tests/testthat/test_update_by_ops_wrappers.R +++ b/R/rdeephaven/inst/tests/testthat/test_update_by_ops_wrappers.R @@ -1470,4 +1470,4 @@ test_that("uby_rolling_wavg_time fails nicely when 'fwd_time' is a bad type", { uby_rolling_wavg_time("PT0s", "wcol", "col", "PT0s", c("Many", "strings")), "'fwd_time' must be a single string. Got a vector of length 2." ) -}) \ No newline at end of file +}) diff --git a/R/rdeephaven/man/AggBy.Rd b/R/rdeephaven/man/AggBy.Rd deleted file mode 100644 index c9bd2a63f5a..00000000000 --- a/R/rdeephaven/man/AggBy.Rd +++ /dev/null @@ -1,101 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/agg_ops_wrapper.R -\docType{class} -\name{AggBy} -\alias{AggBy} -\title{Aggregations in Deephaven} -\description{ -Table aggregations are a quintessential feature of Deephaven. You can apply as many aggregations as -needed to static tables \emph{or} streaming tables, and if the parent tables are streaming, the resulting aggregated -tables will update alongside their parent tables. It is also very easy to perform \emph{grouped} aggregations, which -allow you to aggregate tables on a per-group basis. -} -\section{Apply aggregations to a table}{ - -There are two methods for performing aggregations on a table, \code{agg_by()} and \code{agg_all_by()}. \code{agg_by()} allows you to -perform many aggregations on specified columns, while \code{agg_all_by()} allows you to perform a single aggregation to -every non-grouping column in the table. Both methods have an optional \code{by} parameter that is used to specify grouping columns. -Here are some details on each method: -\itemize{ -\item \code{TableHandle$agg_by(aggs, by)}: Creates a new table containing grouping columns and grouped data. -The resulting grouped data is defined by the aggregation(s) specified. -\item \code{TableHandle$agg_all_by(agg, by)}: Creates a new table containing grouping columns and grouped data. -The resulting grouped data is defined by the aggregation specified. This method applies the aggregation to all -non-grouping columns of the table, so it can only accept one aggregation at a time. -} - -The \code{agg_by()} and \code{agg_all_by()} methods themselves do not know anything about the columns on which you want to -perform aggregations. Rather, the desired columns are passed to individual \code{agg} functions, enabling you to apply -various kinds of aggregations to different columns or groups of columns as needed. -} - -\section{\code{agg} functions}{ - -\code{agg} functions are used to perform aggregation calculations on grouped data by passing them to \code{agg_by()} or -\code{agg_all_by()}. These functions are \emph{generators}, meaning they return \emph{functions} that the Deephaven engine knows -how to interpret. We call the functions that they return \code{\link{AggOp}}s. These \code{AggOp}s are not R-level functions, -but Deephaven-specific data types that perform all of the intensive calculations. Here is a list of all \code{agg} functions -available in Deephaven: -\itemize{ -\item \code{\link[=agg_first]{agg_first()}} -\item \code{\link[=agg_last]{agg_last()}} -\item \code{\link[=agg_min]{agg_min()}} -\item \code{\link[=agg_max]{agg_max()}} -\item \code{\link[=agg_sum]{agg_sum()}} -\item \code{\link[=agg_abs_sum]{agg_abs_sum()}} -\item \code{\link[=agg_avg]{agg_avg()}} -\item \code{\link[=agg_w_avg]{agg_w_avg()}} -\item \code{\link[=agg_median]{agg_median()}} -\item \code{\link[=agg_var]{agg_var()}} -\item \code{\link[=agg_std]{agg_std()}} -\item \code{\link[=agg_percentile]{agg_percentile()}} -\item \code{\link[=agg_count]{agg_count()}} -} - -For more details on each aggregation function, click on one of the methods above or see the reference documentation -by running \code{?agg_first}, \code{?agg_last}, etc. -} - -\examples{ -\dontrun{ -library(rdeephaven) - -# connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") - -# create data frame, push to server, retrieve TableHandle -df <- data.frame( - X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), - Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), - Number1 = c(100, -44, 49, 11, -66, 50, 29, 18, -70), - Number2 = c(-55, 76, 20, 130, 230, -50, 73, 137, 214) -) -th <- client$import_table(df) - -# get first and last elements of each column -th1 <- th$ - agg_by(agg_first(c("XFirst = X", "YFirst = Y", "Number1First = Number1", "Number2First = Number2")), - agg_last(c("XLast = X", "YLast = Y", "Number1Last = Number1", "Number2Last = Number2"))) - -# compute mean and standard deviation of Number1 and Number2, grouped by X -th2 <- th$ - agg_by( - c(agg_avg(c("Number1Avg = Number1", "Number2Avg = Number2")), - agg_std(c("Number1Std = Number1", "Number2Std = Number2"))), - by="X") - -# compute maximum of all non-grouping columns, grouped by X and Y -th3 <- th$ - agg_all_by(agg_max(), by=c("X", "Y")) - -# compute minimum and maximum of Number1 and Number2 respectively grouped by Y -th4 <- th$ - agg_by( - c(agg_min("Number1Min = Number1"), - agg_max("Number2Max = Number2")), - by="Y") - -client$close() -} - -} diff --git a/R/rdeephaven/man/AggOp.Rd b/R/rdeephaven/man/AggOp.Rd index 761ae21a628..166797cb209 100644 --- a/R/rdeephaven/man/AggOp.Rd +++ b/R/rdeephaven/man/AggOp.Rd @@ -5,9 +5,10 @@ \alias{AggOp} \title{Deephaven AggOps} \description{ -An \code{AggOp} is the return type of one of Deephaven's \code{\link[=AggBy]{agg}} functions. It is a function that performs the +An \code{AggOp} is the return type of one of Deephaven's \code{agg} functions. It is a function that performs the computation specified by the \code{agg} function. These are intended to be passed directly to \code{agg_by()} or \code{agg_all_by()}, -and should never be instantiated directly be user code. +and should never be instantiated directly be user code. For more information, see the +vignette on \code{agg} functions with \code{vignette("agg_by")}. If multiple tables have the same schema and the same aggregations need to be applied to each table, saving these objects directly in a variable may be useful to avoid having to re-create them each time: @@ -19,10 +20,7 @@ result1 <- th1$agg_by(aggregations, by="Group") result2 <- th2$agg_by(aggregations, by="Group") }\if{html}{\out{}} -In this example, \code{aggregations} would be a vector of two \code{AggOp}s that can be reused in multiple calls to \code{agg_by()}. -} -\details{ -Name AggOp +In this example, \code{aggregations} would be a vector of two AggOps that can be reused in multiple calls to \code{agg_by()}. } \section{Methods}{ \subsection{Public methods}{ diff --git a/R/rdeephaven/man/Client.Rd b/R/rdeephaven/man/Client.Rd index 132fdb46dfb..9602e5ec5e1 100644 --- a/R/rdeephaven/man/Client.Rd +++ b/R/rdeephaven/man/Client.Rd @@ -13,7 +13,7 @@ import data to and export data from the server, and run queries on the server. library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create a data frame and push it to the server, retrieve a reference to it as a TableHandle df <- data.frame( diff --git a/R/rdeephaven/man/TableHandle.Rd b/R/rdeephaven/man/TableHandle.Rd index 8d86943811b..9f7694a3c34 100644 --- a/R/rdeephaven/man/TableHandle.Rd +++ b/R/rdeephaven/man/TableHandle.Rd @@ -60,12 +60,12 @@ th2$bind_to_variable("t2") library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create a data frame, push it to the server, and retrieve a TableHandle referencing the new table df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:50], - boolCol = sample(c(TRUE,FALSE), 50, TRUE), + boolCol = sample(c(TRUE, FALSE), 50, TRUE), col1 = sample(1000, size = 50, replace = TRUE), col2 = sample(1000, size = 50, replace = TRUE), col3 = 1:50 diff --git a/R/rdeephaven/man/UpdateBy.Rd b/R/rdeephaven/man/UpdateBy.Rd deleted file mode 100644 index fcfccdc1c5b..00000000000 --- a/R/rdeephaven/man/UpdateBy.Rd +++ /dev/null @@ -1,110 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/update_by_ops_wrapper.R -\docType{class} -\name{UpdateBy} -\alias{UpdateBy} -\title{Deephaven's UpdateBy Operations} -\description{ -Deephaven's \code{update_by()} table method and suite of \code{uby} functions enable cumulative and moving calculations -on static \emph{and} streaming tables. Complex operations like cumulative minima and maxima, exponential moving averages, -and rolling standard deviations are all possible and effortless to execute. As always in Deephaven, -the results of these calculations will continue to update as their parent tables are updated. Additionally, it's easy -to group data by one or more columns, enabling complex group-wise calculations with a single line of code. -} -\section{Applying UpdateBy operations to a table}{ - -The table method \code{update_by()} is the entry point for UpdateBy operations. It takes two arguments: the first is an -\code{\link{UpdateByOp}} or a list of \code{UpdateByOp}s denoting the calculations to perform on specific columns of the -table. Then, it takes a column name or a list of column names that define the groups on which to perform the calculations. -If you don't want grouped calculations, omit this argument. - -The \code{update_by()} method itself does not know anything about the columns on which you want to perform calculations. -Rather, the desired columns are passed to individual \code{uby} functions, enabling a massive amount of flexibility. -} - -\section{\code{uby} functions}{ - -\code{uby} functions are the workers that actually execute the complex UpdateBy calculations. These functions are -\emph{generators}, meaning they return \emph{functions} that the Deephaven engine knows how to interpret. We call the functions -that they return \code{\link{UpdateByOp}}s. These \code{UpdateByOp}s are not R-level functions, but Deephaven-specific -data types that perform all of the intensive calculations. Here is a list of all \code{uby} functions available in Deephaven: -\itemize{ -\item \code{\link[=uby_cum_min]{uby_cum_min()}} -\item \code{\link[=uby_cum_max]{uby_cum_max()}} -\item \code{\link[=uby_cum_sum]{uby_cum_sum()}} -\item \code{\link[=uby_cum_prod]{uby_cum_prod()}} -\item \code{\link[=uby_forward_fill]{uby_forward_fill()}} -\item \code{\link[=uby_delta]{uby_delta()}} -\item \code{\link[=uby_emmin_tick]{uby_emmin_tick()}} -\item \code{\link[=uby_emmin_time]{uby_emmin_time()}} -\item \code{\link[=uby_emmax_tick]{uby_emmax_tick()}} -\item \code{\link[=uby_emmax_time]{uby_emmax_time()}} -\item \code{\link[=uby_ems_tick]{uby_ems_tick()}} -\item \code{\link[=uby_ems_time]{uby_ems_time()}} -\item \code{\link[=uby_ema_tick]{uby_ema_tick()}} -\item \code{\link[=uby_ema_time]{uby_ema_time()}} -\item \code{\link[=uby_emstd_tick]{uby_emstd_tick()}} -\item \code{\link[=uby_emstd_time]{uby_emstd_time()}} -\item \code{\link[=uby_rolling_count_tick]{uby_rolling_count_tick()}} -\item \code{\link[=uby_rolling_count_time]{uby_rolling_count_time()}} -\item \code{\link[=uby_rolling_group_tick]{uby_rolling_group_tick()}} -\item \code{\link[=uby_rolling_group_time]{uby_rolling_group_time()}} -\item \code{\link[=uby_rolling_min_tick]{uby_rolling_min_tick()}} -\item \code{\link[=uby_rolling_min_time]{uby_rolling_min_time()}} -\item \code{\link[=uby_rolling_max_tick]{uby_rolling_max_tick()}} -\item \code{\link[=uby_rolling_max_time]{uby_rolling_max_time()}} -\item \code{\link[=uby_rolling_sum_tick]{uby_rolling_sum_tick()}} -\item \code{\link[=uby_rolling_sum_time]{uby_rolling_sum_time()}} -\item \code{\link[=uby_rolling_prod_tick]{uby_rolling_prod_tick()}} -\item \code{\link[=uby_rolling_prod_time]{uby_rolling_prod_time()}} -\item \code{\link[=uby_rolling_avg_tick]{uby_rolling_avg_tick()}} -\item \code{\link[=uby_rolling_avg_time]{uby_rolling_avg_time()}} -\item \code{\link[=uby_rolling_wavg_tick]{uby_rolling_wavg_tick()}} -\item \code{\link[=uby_rolling_wavg_time]{uby_rolling_wavg_time()}} -\item \code{\link[=uby_rolling_std_tick]{uby_rolling_std_tick()}} -\item \code{\link[=uby_rolling_std_time]{uby_rolling_std_time()}} -} - -For more details on each aggregation function, click on one of the methods above or see the reference documentation -by running \code{?uby_cum_min}, \code{?uby_delta}, etc. -} - -\examples{ -\dontrun{ -library(rdeephaven) - -# connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") - -# create data frame, push to server, retrieve TableHandle -df <- data.frame( - timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), - col1 = sample(10000, size = 500, replace = TRUE), - col2 = sample(10000, size = 500, replace = TRUE), - col3 = 1:500 -) -th <- client$import_table(df) - -# compute 10-row exponential weighted moving average of col1 and col2, grouped by boolCol -th1 <- th$ - update_by(uby_ema_tick(decay_ticks=10, cols=c("col1Ema = col1", "col2Ema = col2")), by="boolCol") - -# compute rolling 10-second weighted average and standard deviation of col1 and col2, weighted by col3 -th2 <- th$ - update_by( - c(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1WAvg = col1", "col2WAvg = col2"), rev_time="PT10s"), - uby_rolling_std_time(ts_col="timeCol", cols=c("col1Std = col1", "col2Std = col2"), rev_time="PT10s"))) - -# compute cumulative minimum and maximum of col1 and col2 respectively, and the rolling 20-row sum of col3, grouped by boolCol -th3 <- th$ - update_by( - c(uby_cum_min(cols="col1"), - uby_cum_max(cols="col2"), - uby_rolling_sum_tick(cols="col3", rev_ticks=20)), - by="boolCol") - -client$close() -} - -} diff --git a/R/rdeephaven/man/UpdateByOp.Rd b/R/rdeephaven/man/UpdateByOp.Rd index b4612690cf8..db1f1a5b597 100644 --- a/R/rdeephaven/man/UpdateByOp.Rd +++ b/R/rdeephaven/man/UpdateByOp.Rd @@ -7,7 +7,8 @@ \description{ An \code{UpdateByOp} is the return type of one of Deephaven's \code{\link[=UpdateBy]{uby}} functions. It is a function that performs the computation specified by the \code{uby} function. These are intended to be passed directly to \code{update_by()}, -and should never be instantiated directly be user code. +and should never be instantiated directly be user code. For more information, see the vignette on +\code{uby} functions with \code{vignette("update_by")}. If multiple tables have the same schema and the same UpdateBy operations need to be applied to each table, saving these objects directly in a variable may be useful to avoid having to re-create them each time: diff --git a/R/rdeephaven/man/agg_abs_sum.Rd b/R/rdeephaven/man/agg_abs_sum.Rd index a5c74e02526..3688270cd29 100644 --- a/R/rdeephaven/man/agg_abs_sum.Rd +++ b/R/rdeephaven/man/agg_abs_sum.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates an Absolute Sum aggregation that computes the absolute sum of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # compute absolute sum of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_abs_sum(c("Number1", "Number2")), by="X") + agg_by(agg_abs_sum(c("Number1", "Number2")), by = "X") # compute absolute sum of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_abs_sum(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_abs_sum(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_avg.Rd b/R/rdeephaven/man/agg_avg.Rd index 21711b256f7..156d9f0f646 100644 --- a/R/rdeephaven/man/agg_avg.Rd +++ b/R/rdeephaven/man/agg_avg.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates an Average aggregation that computes the average of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # compute average of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_avg(c("Number1", "Number2")), by="X") + agg_by(agg_avg(c("Number1", "Number2")), by = "X") # compute average of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_avg(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_avg(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_count.Rd b/R/rdeephaven/man/agg_count.Rd index 785545726bd..ea02848cf0e 100644 --- a/R/rdeephaven/man/agg_count.Rd +++ b/R/rdeephaven/man/agg_count.Rd @@ -7,7 +7,7 @@ \item{col}{String denoting the name of the new column to hold the counts of each aggregation group.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()}. } \description{ Creates a Count aggregation that counts the number of rows in each aggregation group. @@ -23,6 +23,9 @@ function called an \code{\link{AggOp}} intended to be used in a call to \code{ag typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. + Note that this operation is not supported in \code{agg_all_by()}. } \examples{ @@ -30,7 +33,7 @@ Note that this operation is not supported in \code{agg_all_by()}. library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -43,11 +46,11 @@ th <- client$import_table(df) # count number of elements in each group when grouped by X, name resulting column "count" th1 <- th$ - agg_by(agg_count("count"), by="X") + agg_by(agg_count("count"), by = "X") # count number of elements in each group when grouped by X and Y, name resulting column "CountingCol" th2 <- th$ - agg_by(agg_count("CountingCol"), by=c("X", "Y")) + agg_by(agg_count("CountingCol"), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_first.Rd b/R/rdeephaven/man/agg_first.Rd index fb914fcfcfa..2e2f6ffcff3 100644 --- a/R/rdeephaven/man/agg_first.Rd +++ b/R/rdeephaven/man/agg_first.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a First aggregation that computes the first value of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # get first elements of Y, Number1, and Number2 grouped by X th2 <- th$ - agg_by(agg_first(c("Y", "Number1", "Number2")), by="X") + agg_by(agg_first(c("Y", "Number1", "Number2")), by = "X") # get first elements of Number1 and Number2 grouped by X and Y th3 <- th - agg_by(agg_first(c("Number1", "Number2")), by=c("X", "Y")) +agg_by(agg_first(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_last.Rd b/R/rdeephaven/man/agg_last.Rd index 0cc4d5032ba..a96876670e9 100644 --- a/R/rdeephaven/man/agg_last.Rd +++ b/R/rdeephaven/man/agg_last.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Last aggregation that computes the last value of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # get last elements of Y, Number1, and Number2 grouped by X th2 <- th$ - agg_by(agg_last(c("Y", "Number1", "Number2")), by="X") + agg_by(agg_last(c("Y", "Number1", "Number2")), by = "X") # get last elements of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_last(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_last(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_max.Rd b/R/rdeephaven/man/agg_max.Rd index c7bb7ebb13c..5a092d276cc 100644 --- a/R/rdeephaven/man/agg_max.Rd +++ b/R/rdeephaven/man/agg_max.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Maximum aggregation that computes the maximum of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # get maximum elements of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_max(c("Number1", "Number2")), by="X") + agg_by(agg_max(c("Number1", "Number2")), by = "X") # get maximum elements of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_max(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_max(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_median.Rd b/R/rdeephaven/man/agg_median.Rd index b64da3a26d2..3c08b9d4c02 100644 --- a/R/rdeephaven/man/agg_median.Rd +++ b/R/rdeephaven/man/agg_median.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Median aggregation that computes the median of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # compute median of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_median(c("Number1", "Number2")), by="X") + agg_by(agg_median(c("Number1", "Number2")), by = "X") # compute median of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_median(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_median(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_min.Rd b/R/rdeephaven/man/agg_min.Rd index 4d66d14fd78..fd9b603a0d2 100644 --- a/R/rdeephaven/man/agg_min.Rd +++ b/R/rdeephaven/man/agg_min.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Minimum aggregation that computes the minimum of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # get minimum elements of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_min(c("Number1", "Number2")), by="X") + agg_by(agg_min(c("Number1", "Number2")), by = "X") # get minimum elements of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_min(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_min(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_percentile.Rd b/R/rdeephaven/man/agg_percentile.Rd index 84f1f63d31a..4c76455d447 100644 --- a/R/rdeephaven/man/agg_percentile.Rd +++ b/R/rdeephaven/man/agg_percentile.Rd @@ -10,7 +10,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Percentile aggregation that computes the given percentile of each column in \code{cols} for each aggregation group. @@ -25,13 +25,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -44,15 +47,15 @@ th <- client$import_table(df) # compute 20th percentile of Number1 and Number2 th1 <- th$ - agg_by(agg_percentile(percentile=0.2, cols=c("Number1", "Number2"))) + agg_by(agg_percentile(percentile = 0.2, cols = c("Number1", "Number2"))) # compute 50th percentile of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_percentile(percentile=0.5, cols=c("Number1", "Number2")), by="X") + agg_by(agg_percentile(percentile = 0.5, cols = c("Number1", "Number2")), by = "X") # compute 75th percentile of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_percentile(percentile=0.75, cols=c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_percentile(percentile = 0.75, cols = c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_std.Rd b/R/rdeephaven/man/agg_std.Rd index 843e1b78ff6..5101145707d 100644 --- a/R/rdeephaven/man/agg_std.Rd +++ b/R/rdeephaven/man/agg_std.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Standard Deviation aggregation that computes the standard deviation of each column in \code{cols}, for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # compute standard deviation of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_std(c("Number1", "Number2")), by="X") + agg_by(agg_std(c("Number1", "Number2")), by = "X") # compute standard deviation of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_std(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_std(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_sum.Rd b/R/rdeephaven/man/agg_sum.Rd index 76c270a6bf0..ac2de464289 100644 --- a/R/rdeephaven/man/agg_sum.Rd +++ b/R/rdeephaven/man/agg_sum.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Sum aggregation that computes the sum of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # compute sum of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_sum(c("Number1", "Number2")), by="X") + agg_by(agg_sum(c("Number1", "Number2")), by = "X") # compute sum of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_sum(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_sum(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_var.Rd b/R/rdeephaven/man/agg_var.Rd index 5f8c7ee9537..94d0b59c283 100644 --- a/R/rdeephaven/man/agg_var.Rd +++ b/R/rdeephaven/man/agg_var.Rd @@ -8,7 +8,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Variance aggregation that computes the variance of each column in \code{cols} for each aggregation group. @@ -23,13 +23,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -46,11 +49,11 @@ th1 <- th$ # compute variance of Number1 and Number2 grouped by X th2 <- th$ - agg_by(agg_var(c("Number1", "Number2")), by="X") + agg_by(agg_var(c("Number1", "Number2")), by = "X") # compute variance of Number1 and Number2 grouped by X and Y th3 <- th$ - agg_by(agg_var(c("Number1", "Number2")), by=c("X", "Y")) + agg_by(agg_var(c("Number1", "Number2")), by = c("X", "Y")) client$close() } diff --git a/R/rdeephaven/man/agg_w_avg.Rd b/R/rdeephaven/man/agg_w_avg.Rd index d9a27eec52c..575f3fba44c 100644 --- a/R/rdeephaven/man/agg_w_avg.Rd +++ b/R/rdeephaven/man/agg_w_avg.Rd @@ -10,7 +10,7 @@ Default is to aggregate all non-grouping columns, which is only valid in the \code{agg_all_by()} operation.} } \value{ -\code{AggOp} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. +\code{\link{AggOp}} to be used in a call to \code{agg_by()} or \code{agg_all_by()}. } \description{ Creates a Weighted Average aggregation that computes the weighted average of each column in \code{cols} for each aggregation group. @@ -25,13 +25,16 @@ This function, like other Deephaven \code{agg} functions, is a generator functio function called an \code{\link{AggOp}} intended to be used in a call to \code{agg_by()} or \code{agg_all_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of an \code{agg} function can otherwise seem unexpected. + +For more information, see the vignette on \code{agg} functions by running +\code{vignette("agg_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( @@ -44,15 +47,15 @@ th <- client$import_table(df) # compute weighted average of Number1, weighted by Number2 th1 <- th$ - agg_by(agg_w_avg(wcol="Number2", cols="Number1")) + agg_by(agg_w_avg(wcol = "Number2", cols = "Number1")) # compute weighted average of Number1, weighted by Number2, grouped by X th2 <- th$ - agg_by(agg_w_avg(wcol="Number2", cols="Number1", by="X")) + agg_by(agg_w_avg(wcol = "Number2", cols = "Number1", by = "X")) # compute weighted average of Number1, weighted by Number2, grouped by X and Y th3 <- th$ - agg_by(agg_w_avg(wcol="Number2", cols="Number1", by=c("X", "Y"))) + agg_by(agg_w_avg(wcol = "Number2", cols = "Number1", by = c("X", "Y"))) client$close() } diff --git a/R/rdeephaven/man/rdeephaven.Rd b/R/rdeephaven/man/rdeephaven.Rd deleted file mode 100644 index 96dd3c1f33a..00000000000 --- a/R/rdeephaven/man/rdeephaven.Rd +++ /dev/null @@ -1,101 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/exports.R -\name{rdeephaven} -\alias{rdeephaven} -\title{The Deephaven Community R Client} -\description{ -The Deephaven Community R Client provides an R interface to Deephaven's powerful real-time data engine, \href{https://deephaven.io/community/}{\emph{Deephaven Core}}. -To use this package, you must have a Deephaven server running and be able to connect to it. For more information on -how to set up a Deephaven server, see the documentation \href{https://deephaven.io/core/docs/tutorials/quickstart/}{here}. -} -\section{Building blocks of the Deephaven R Client}{ - -There are two primary R classes that make up the Deephaven R Client, the \code{\link{Client}} class and the -\code{\link{TableHandle}} class. The \code{Client} class is used to establish a connection to the Deephaven server with -its constructor \code{Client$new()}, and to send server requests, such as running a script via \code{run_script()}, or pushing -local data to the server via \code{import_table()}. Basic usage of the \code{Client} class may look something like this: - -\if{html}{\out{

}}\preformatted{library(rdeephaven) - -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") -}\if{html}{\out{
}} - -Many of these server requests end up creating or modifying tables that live on the server. To keep track of these -tables, the R client retrieves references to them, and wraps these references in \code{TableHandle} objects. These -TableHandles have a host of methods that mirror server-side table operations, such as \code{head()}, \code{tail()}, \code{update()}, -and so on. So, you can typically use TableHandles \emph{as if} they are tables themselves, and all of the corresponding -methods that you call on them will be executed on the server. Here is a simple example of pushing data to the server, -retrieving a TableHandle to the resulting table, and applying some basic table operations to the table: - -\if{html}{\out{
}}\preformatted{df1 <- data.frame(x=1:10, y=11:20) -th1 <- client$import_table(df1) - -th2 <- th1$ - update("z = x + y")$ - where("z \% 4 == 0") -}\if{html}{\out{
}} - -TableHandles also support common functional methods for converting server-side Deephaven tables to R objects stored in -local memory such as \code{as.data.frame()}, \code{as_tibble()}, and \code{as_arrow_table()}. Here's an example of converting the -table created above to an R data frame and verifying that other functional methods work as expected: - -\if{html}{\out{
}}\preformatted{df2 <- as.data.frame(th2) - -print(nrow(th2) == nrow(df2)) -print(ncol(th2) == ncol(df2)) -print(dim(th2) == dim(df2)) -print(all(as.data.frame(head(th2, 2)) == head(df2, 2))) -print(all(as.data.frame(tail(th2, 2)) == tail(df2, 2))) -}\if{html}{\out{
}} - -For more information on these classes and all of their methods, see the reference documentation for \code{\link{Client}} -and \code{\link{TableHandle}} by clicking on their class names, or by running \code{?Client} or \code{?TableHandle}. -} - -\section{Real-time data analysis}{ - -Since TableHandles are references to tables living on the Deephaven server, they may refer to streaming tables, or -tables that are receiving new data periodically (typically once per second). Here's a simple example of creating a -table that adds a new row every second: - -\if{html}{\out{
}}\preformatted{th3 <- client$time_table("PT1s")$ - update(c("X = ii", "Y = sin(X)")) -}\if{html}{\out{
}} - -R objects like data frames or Dplyr tibbles do not have this streaming property - they are always static objects -stored in memory. However, a TableHandle referring to a streaming table may be converted to a data frame or tibble at -any time, and the resulting object will be a snapshot of the table at the time of conversion. This means that you can -use the Deephaven R Client to perform real-time data analysis on streaming data! Here, we make a simple plot of the -ticking table, and call it three times to demonstrate the dynamic nature of the table: - -\if{html}{\out{
}}\preformatted{plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type="l") -Sys.sleep(5) -plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type="l") -Sys.sleep(5) -plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type="l") -}\if{html}{\out{
}} - -There are performance and memory considerations when pulling data from the server, so it is best to use the provided -TableHandle methods to perform as much of your analysis as possible on the server, and to only pull the data when -something \emph{must} be done in R, like plotting or writing to a local file. -} - -\section{Powerful table operations}{ - -Much of the power of Deephaven's suite of table operations is achieved through the use of the \code{\link[=UpdateBy]{update_by()}} -and \code{\link[=AggBy]{agg_by()}} methods. These table methods are important enough to warrant their own documentation pages, accessible -by clicking on their names, or by running \code{?UpdateBy} or \code{?AggBy}. These methods come with their own suites of functions, -prefixed with \code{agg_} and \code{uby_} respectively, that are discoverable from their documentation pages. Running \code{ls("package:rdeephaven")} -will reveal that most of the functions included in this package are for these methods, so it is important to get acquainted -with them. -} - -\section{Getting help}{ - -While we've done our best to provide good documentation for this package, you may find you need more help than what -this documentation has to offer. Please visit the official Deephaven Community Core \href{https://deephaven.io/core/docs/tutorials/quickstart/}{documentation} -to learn more about Deephaven and to find comprehensive examples. Additionally, feel free to reach out to us on -the Deephaven \href{https://deephaven.io/slack}{Community Slack channel} with any questions. -We hope you find real-time data analysis in R to be as easy as possible. -} - diff --git a/R/rdeephaven/man/uby_cum_max.Rd b/R/rdeephaven/man/uby_cum_max.Rd index dfd209e3f38..2b940d46414 100644 --- a/R/rdeephaven/man/uby_cum_max.Rd +++ b/R/rdeephaven/man/uby_cum_max.Rd @@ -23,18 +23,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -47,12 +50,12 @@ th1 <- th$ # compute cumulative maximum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by="boolCol") + update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by = "boolCol") # compute cumulative maximum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_cum_max(c("col1CumMax = col1", "col2CumMax = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_cum_min.Rd b/R/rdeephaven/man/uby_cum_min.Rd index c8b0c7d8040..ed576f7b170 100644 --- a/R/rdeephaven/man/uby_cum_min.Rd +++ b/R/rdeephaven/man/uby_cum_min.Rd @@ -23,18 +23,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -47,12 +50,12 @@ th1 <- th$ # compute cumulative minimum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by="boolCol") + update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by = "boolCol") # compute cumulative minimum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_cum_min(c("col1CumMin = col1", "col2CumMin = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_cum_prod.Rd b/R/rdeephaven/man/uby_cum_prod.Rd index 872b9c59e18..e3710a95a5c 100644 --- a/R/rdeephaven/man/uby_cum_prod.Rd +++ b/R/rdeephaven/man/uby_cum_prod.Rd @@ -23,18 +23,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -47,12 +50,12 @@ th1 <- th$ # compute cumulative product of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by="boolCol") + update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by = "boolCol") # compute cumulative product of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_cum_prod(c("col1CumProd = col1", "col2CumProd = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_cum_sum.Rd b/R/rdeephaven/man/uby_cum_sum.Rd index d12ebde5eae..12d7e4f97ab 100644 --- a/R/rdeephaven/man/uby_cum_sum.Rd +++ b/R/rdeephaven/man/uby_cum_sum.Rd @@ -23,18 +23,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -47,12 +50,12 @@ th1 <- th$ # compute cumulative sum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by="boolCol") + update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by = "boolCol") # compute cumulative sum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_cum_sum(c("col1CumSum = col1", "col2CumSum = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_delta.Rd b/R/rdeephaven/man/uby_delta.Rd index 2679b3886a7..427d493ceed 100644 --- a/R/rdeephaven/man/uby_delta.Rd +++ b/R/rdeephaven/man/uby_delta.Rd @@ -34,18 +34,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -58,12 +61,12 @@ th1 <- th$ # compute consecutive differences of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by="boolCol") + update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by = "boolCol") # compute consecutive differences of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_delta(c("col1Delta = col1", "col2Delta = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_ema_tick.Rd b/R/rdeephaven/man/uby_ema_tick.Rd index 9d680ddcc3a..ecbf3ff37b2 100644 --- a/R/rdeephaven/man/uby_ema_tick.Rd +++ b/R/rdeephaven/man/uby_ema_tick.Rd @@ -40,18 +40,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -60,16 +63,16 @@ th <- client$import_table(df) # compute 10-row exponential moving average of col1 and col2 th1 <- th$ - update_by(uby_ema_tick(decay_ticks=10, cols=c("col1Ema = col1", "col2Ema = col2"))) + update_by(uby_ema_tick(decay_ticks = 10, cols = c("col1Ema = col1", "col2Ema = col2"))) # compute 5-row exponential moving average of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_ema_tick(decay_ticks=5, cols=c("col1Ema = col1", "col2Ema = col2")), by="boolCol") + update_by(uby_ema_tick(decay_ticks = 5, cols = c("col1Ema = col1", "col2Ema = col2")), by = "boolCol") # compute 20-row exponential moving average of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_ema_tick(decay_ticks=20, cols=c("col1Ema = col1", "col2Ema = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_ema_tick(decay_ticks = 20, cols = c("col1Ema = col1", "col2Ema = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_ema_time.Rd b/R/rdeephaven/man/uby_ema_time.Rd index 862f1be69c3..e86dca08f4a 100644 --- a/R/rdeephaven/man/uby_ema_time.Rd +++ b/R/rdeephaven/man/uby_ema_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute 10-second exponential moving average of col1 and col2 th1 <- th$ - update_by(uby_ema_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Ema = col1", "col2Ema = col2"))) + update_by(uby_ema_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Ema = col1", "col2Ema = col2"))) # compute 5-second exponential moving average of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_ema_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Ema = col1", "col2Ema = col2")), by="boolCol") + update_by(uby_ema_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Ema = col1", "col2Ema = col2")), by = "boolCol") # compute 20-second exponential moving average of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_ema_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Ema = col1", "col2Ema = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_ema_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Ema = col1", "col2Ema = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_emmax_tick.Rd b/R/rdeephaven/man/uby_emmax_tick.Rd index db344db1c35..9f7f960ffde 100644 --- a/R/rdeephaven/man/uby_emmax_tick.Rd +++ b/R/rdeephaven/man/uby_emmax_tick.Rd @@ -40,18 +40,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -60,16 +63,16 @@ th <- client$import_table(df) # compute 10-row exponential moving maximum of col1 and col2 th1 <- th$ - update_by(uby_emmax_tick(decay_ticks=10, cols=c("col1Emmax = col1", "col2Emmax = col2"))) + update_by(uby_emmax_tick(decay_ticks = 10, cols = c("col1Emmax = col1", "col2Emmax = col2"))) # compute 5-row exponential moving maximum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_emmax_tick(decay_ticks=5, cols=c("col1Emmax = col1", "col2Emmax = col2")), by="boolCol") + update_by(uby_emmax_tick(decay_ticks = 5, cols = c("col1Emmax = col1", "col2Emmax = col2")), by = "boolCol") # compute 20-row exponential moving maximum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_emmax_tick(decay_ticks=20, cols=c("col1Emmax = col1", "col2Emmax = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_emmax_tick(decay_ticks = 20, cols = c("col1Emmax = col1", "col2Emmax = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_emmax_time.Rd b/R/rdeephaven/man/uby_emmax_time.Rd index 412c5e3239a..c845c45039b 100644 --- a/R/rdeephaven/man/uby_emmax_time.Rd +++ b/R/rdeephaven/man/uby_emmax_time.Rd @@ -41,18 +41,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -61,16 +64,16 @@ th <- client$import_table(df) # compute 10-second exponential moving maximum of col1 and col2 th1 <- th$ - update_by(uby_emmax_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Emmax = col1", "col2Emmax = col2"))) + update_by(uby_emmax_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Emmax = col1", "col2Emmax = col2"))) # compute 5-second exponential moving maximum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_emmax_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Emmax = col1", "col2Emmax = col2")), by="boolCol") + update_by(uby_emmax_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Emmax = col1", "col2Emmax = col2")), by = "boolCol") # compute 20-second exponential moving maximum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_emmax_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Emmax = col1", "col2Emmax = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_emmax_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Emmax = col1", "col2Emmax = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_emmin_tick.Rd b/R/rdeephaven/man/uby_emmin_tick.Rd index 0c9b8c9390c..20055b7891b 100644 --- a/R/rdeephaven/man/uby_emmin_tick.Rd +++ b/R/rdeephaven/man/uby_emmin_tick.Rd @@ -40,18 +40,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -60,16 +63,16 @@ th <- client$import_table(df) # compute 10-row exponential moving minimum of col1 and col2 th1 <- th$ - update_by(uby_emmin_tick(decay_ticks=10, cols=c("col1Emmin = col1", "col2Emmin = col2"))) + update_by(uby_emmin_tick(decay_ticks = 10, cols = c("col1Emmin = col1", "col2Emmin = col2"))) # compute 5-row exponential moving minimum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_emmin_tick(decay_ticks=5, cols=c("col1Emmin = col1", "col2Emmin = col2")), by="boolCol") + update_by(uby_emmin_tick(decay_ticks = 5, cols = c("col1Emmin = col1", "col2Emmin = col2")), by = "boolCol") # compute 20-row exponential moving minimum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_emmin_tick(decay_ticks=20, cols=c("col1Emmin = col1", "col2Emmin = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_emmin_tick(decay_ticks = 20, cols = c("col1Emmin = col1", "col2Emmin = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_emmin_time.Rd b/R/rdeephaven/man/uby_emmin_time.Rd index d1ee01a441b..ca01ed2117b 100644 --- a/R/rdeephaven/man/uby_emmin_time.Rd +++ b/R/rdeephaven/man/uby_emmin_time.Rd @@ -41,18 +41,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -61,16 +64,16 @@ th <- client$import_table(df) # compute 10-second exponential moving minimum of col1 and col2 th1 <- th$ - update_by(uby_emmin_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Emmin = col1", "col2Emmin = col2"))) + update_by(uby_emmin_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Emmin = col1", "col2Emmin = col2"))) # compute 5-second exponential moving minimum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_emmin_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Emmin = col1", "col2Emmin = col2")), by="boolCol") + update_by(uby_emmin_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Emmin = col1", "col2Emmin = col2")), by = "boolCol") # compute 20-second exponential moving minimum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_emmin_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Emmin = col1", "col2Emmin = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_emmin_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Emmin = col1", "col2Emmin = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_ems_tick.Rd b/R/rdeephaven/man/uby_ems_tick.Rd index 7e5a45e8c79..11536a6ac25 100644 --- a/R/rdeephaven/man/uby_ems_tick.Rd +++ b/R/rdeephaven/man/uby_ems_tick.Rd @@ -40,18 +40,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -60,16 +63,16 @@ th <- client$import_table(df) # compute 10-row exponential moving sum of col1 and col2 th1 <- th$ - update_by(uby_ems_tick(decay_ticks=10, cols=c("col1Ems = col1", "col2Ems = col2"))) + update_by(uby_ems_tick(decay_ticks = 10, cols = c("col1Ems = col1", "col2Ems = col2"))) # compute 5-row exponential moving sum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_ems_tick(decay_ticks=5, cols=c("col1Ems = col1", "col2Ems = col2")), by="boolCol") + update_by(uby_ems_tick(decay_ticks = 5, cols = c("col1Ems = col1", "col2Ems = col2")), by = "boolCol") # compute 20-row exponential moving sum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_ems_tick(decay_ticks=20, cols=c("col1Ems = col1", "col2Ems = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_ems_tick(decay_ticks = 20, cols = c("col1Ems = col1", "col2Ems = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_ems_time.Rd b/R/rdeephaven/man/uby_ems_time.Rd index 12788d09e62..823cb67fe19 100644 --- a/R/rdeephaven/man/uby_ems_time.Rd +++ b/R/rdeephaven/man/uby_ems_time.Rd @@ -41,18 +41,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -61,16 +64,16 @@ th <- client$import_table(df) # compute 10-second exponential moving sum of col1 and col2 th1 <- th$ - update_by(uby_ems_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Ems = col1", "col2Ems = col2"))) + update_by(uby_ems_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Ems = col1", "col2Ems = col2"))) # compute 5-second exponential moving sum of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_ems_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Ems = col1", "col2Ems = col2")), by="boolCol") + update_by(uby_ems_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Ems = col1", "col2Ems = col2")), by = "boolCol") # compute 20-second exponential moving sum of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_ems_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Ems = col1", "col2Ems = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_ems_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Ems = col1", "col2Ems = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_emstd_tick.Rd b/R/rdeephaven/man/uby_emstd_tick.Rd index 294eadbc46d..50e45858d35 100644 --- a/R/rdeephaven/man/uby_emstd_tick.Rd +++ b/R/rdeephaven/man/uby_emstd_tick.Rd @@ -45,18 +45,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -65,16 +68,16 @@ th <- client$import_table(df) # compute 10-row exponential moving standard deviation of col1 and col2 th1 <- th$ - update_by(uby_emstd_tick(decay_ticks=10, cols=c("col1Emstd = col1", "col2Emstd = col2"))) + update_by(uby_emstd_tick(decay_ticks = 10, cols = c("col1Emstd = col1", "col2Emstd = col2"))) # compute 5-row exponential moving standard deviation of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_emstd_tick(decay_ticks=5, cols=c("col1Emstd = col1", "col2Emstd = col2")), by="boolCol") + update_by(uby_emstd_tick(decay_ticks = 5, cols = c("col1Emstd = col1", "col2Emstd = col2")), by = "boolCol") # compute 20-row exponential moving standard deviation of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_emstd_tick(decay_ticks=20, cols=c("col1Emstd = col1", "col2Emstd = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_emstd_tick(decay_ticks = 20, cols = c("col1Emstd = col1", "col2Emstd = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_emstd_time.Rd b/R/rdeephaven/man/uby_emstd_time.Rd index e166d18869d..af605cacafa 100644 --- a/R/rdeephaven/man/uby_emstd_time.Rd +++ b/R/rdeephaven/man/uby_emstd_time.Rd @@ -46,18 +46,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -66,16 +69,16 @@ th <- client$import_table(df) # compute 10-second exponential moving standard deviation of col1 and col2 th1 <- th$ - update_by(uby_emstd_time(ts_col="timeCol", decay_time="PT10s", cols=c("col1Emstd = col1", "col2Emstd = col2"))) + update_by(uby_emstd_time(ts_col = "timeCol", decay_time = "PT10s", cols = c("col1Emstd = col1", "col2Emstd = col2"))) # compute 5-second exponential moving standard deviation of col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_emstd_time(ts_col="timeCol", decay_time="PT5s", cols=c("col1Emstd = col1", "col2Emstd = col2")), by="boolCol") + update_by(uby_emstd_time(ts_col = "timeCol", decay_time = "PT5s", cols = c("col1Emstd = col1", "col2Emstd = col2")), by = "boolCol") # compute 20-second exponential moving standard deviation of col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_emstd_time(ts_col="timeCol", decay_time="PT20s", cols=c("col1Emstd = col1", "col2Emstd = col2")), by=c("boolCol", "col3Parity")) + update_by(uby_emstd_time(ts_col = "timeCol", decay_time = "PT20s", cols = c("col1Emstd = col1", "col2Emstd = col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_forward_fill.Rd b/R/rdeephaven/man/uby_forward_fill.Rd index 71b074e89da..473941fafd9 100644 --- a/R/rdeephaven/man/uby_forward_fill.Rd +++ b/R/rdeephaven/man/uby_forward_fill.Rd @@ -24,18 +24,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = replace(sample(10000, size = 500, replace = TRUE), sample(500, 100), NA), col2 = replace(sample(10000, size = 500, replace = TRUE), sample(500, 100), NA), col3 = replace(1:500, sample(500, 100), NA) @@ -48,13 +51,13 @@ th1 <- th$ # forward fill col1 and col2, grouped by boolCol th2 <- th$ - update_by(uby_forward_fill(c("col1", "col2")), by="boolCol") + update_by(uby_forward_fill(c("col1", "col2")), by = "boolCol") # forward fill col3, compute parity of col3, and forward fill col1 and col2, grouped by boolCol and parity of col3 th3 <- th$ update_by(uby_forward_fill("col3"))$ update("col3Parity = col3 \% 2")$ - update_by(uby_forward_fill(c("col1", "col2")), by=c("boolCol", "col3Parity")) + update_by(uby_forward_fill(c("col1", "col2")), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_avg_tick.Rd b/R/rdeephaven/man/uby_rolling_avg_tick.Rd index 04171aaa17e..131fff4e472 100644 --- a/R/rdeephaven/man/uby_rolling_avg_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_avg_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling average of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_avg_tick(cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks=6)) + update_by(uby_rolling_avg_tick(cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks = 6)) # compute rolling average of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_avg_tick(cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") + update_by(uby_rolling_avg_tick(cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling average of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_avg_tick(cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_avg_tick(cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_avg_time.Rd b/R/rdeephaven/man/uby_rolling_avg_time.Rd index af048148d88..1361aa53b3f 100644 --- a/R/rdeephaven/man/uby_rolling_avg_time.Rd +++ b/R/rdeephaven/man/uby_rolling_avg_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling average of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_avg_time(ts_col="timeCol", cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time="PT5s")) + update_by(uby_rolling_avg_time(ts_col = "timeCol", cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time = "PT5s")) # compute rolling average of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_avg_time(ts_col="timeCol", cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") + update_by(uby_rolling_avg_time(ts_col = "timeCol", cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling average of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_avg_time(ts_col="timeCol", cols=c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_avg_time(ts_col = "timeCol", cols = c("col1RollAvg = col1", "col2RollAvg = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_count_tick.Rd b/R/rdeephaven/man/uby_rolling_count_tick.Rd index f319e99b895..f97ec165ed2 100644 --- a/R/rdeephaven/man/uby_rolling_count_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_count_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling count of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_count_tick(cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks=6)) + update_by(uby_rolling_count_tick(cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks = 6)) # compute rolling count of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_count_tick(cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks=6, fwd_ticks=5), by="boolCol") + update_by(uby_rolling_count_tick(cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling count of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_count_tick(cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks=1, fwd_ticks=10), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_count_tick(cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_count_time.Rd b/R/rdeephaven/man/uby_rolling_count_time.Rd index ecaef05be59..b3b55749155 100644 --- a/R/rdeephaven/man/uby_rolling_count_time.Rd +++ b/R/rdeephaven/man/uby_rolling_count_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling count of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_count_time(ts_col="timeCol", cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_time="PT5s")) + update_by(uby_rolling_count_time(ts_col = "timeCol", cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_time = "PT5s")) # compute rolling count of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_count_time(ts_col="timeCol", cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_time="PT5s", fwd_ticks="PT5s"), by="boolCol") + update_by(uby_rolling_count_time(ts_col = "timeCol", cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling count of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_count_time(ts_col="timeCol", cols=c("col1RollCount = col1", "col2RollCount = col2"), rev_time="PT0s", fwd_time="PT10s"), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_count_time(ts_col = "timeCol", cols = c("col1RollCount = col1", "col2RollCount = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_group_tick.Rd b/R/rdeephaven/man/uby_rolling_group_tick.Rd index fde8d3aacfe..c036ef80405 100644 --- a/R/rdeephaven/man/uby_rolling_group_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_group_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,12 +67,12 @@ th <- client$import_table(df) # compute rolling group of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th1 <- th$ - update_by(uby_rolling_group_tick(cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") + update_by(uby_rolling_group_tick(cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling group of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th2 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_group_tick(cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_group_tick(cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_group_time.Rd b/R/rdeephaven/man/uby_rolling_group_time.Rd index 74b447c1816..62d578bdc2c 100644 --- a/R/rdeephaven/man/uby_rolling_group_time.Rd +++ b/R/rdeephaven/man/uby_rolling_group_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,12 +66,12 @@ th <- client$import_table(df) # compute rolling group of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th1 <- th$ - update_by(uby_rolling_group_time(ts_col="timeCol", cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") + update_by(uby_rolling_group_time(ts_col = "timeCol", cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling group of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th2 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_group_time(ts_col="timeCol", cols=c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_group_time(ts_col = "timeCol", cols = c("col1RollGroup = col1", "col2RollGroup = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_max_tick.Rd b/R/rdeephaven/man/uby_rolling_max_tick.Rd index cd90d3f9578..21c0d1a5ae0 100644 --- a/R/rdeephaven/man/uby_rolling_max_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_max_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling maximum of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_max_tick(cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks=6)) + update_by(uby_rolling_max_tick(cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks = 6)) # compute rolling maximum of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_max_tick(cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") + update_by(uby_rolling_max_tick(cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling maximum of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_max_tick(cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_max_tick(cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_max_time.Rd b/R/rdeephaven/man/uby_rolling_max_time.Rd index d259bc28db8..71ae9e7389d 100644 --- a/R/rdeephaven/man/uby_rolling_max_time.Rd +++ b/R/rdeephaven/man/uby_rolling_max_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling maximum of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_max_time(ts_col="timeCol", cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_time="PT5s")) + update_by(uby_rolling_max_time(ts_col = "timeCol", cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_time = "PT5s")) # compute rolling maximum of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_max_time(ts_col="timeCol", cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") + update_by(uby_rolling_max_time(ts_col = "timeCol", cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling maximum of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_max_time(ts_col="timeCol", cols=c("col1RollMax = col1", "col2RollMax = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_max_time(ts_col = "timeCol", cols = c("col1RollMax = col1", "col2RollMax = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_min_tick.Rd b/R/rdeephaven/man/uby_rolling_min_tick.Rd index fb659d40d48..f765c36004a 100644 --- a/R/rdeephaven/man/uby_rolling_min_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_min_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling minimum of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_min_tick(cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks=6)) + update_by(uby_rolling_min_tick(cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks = 6)) # compute rolling minimum of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_min_tick(cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") + update_by(uby_rolling_min_tick(cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling minimum of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_min_tick(cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_min_tick(cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_min_time.Rd b/R/rdeephaven/man/uby_rolling_min_time.Rd index 1fb73cefae3..49a12e1a989 100644 --- a/R/rdeephaven/man/uby_rolling_min_time.Rd +++ b/R/rdeephaven/man/uby_rolling_min_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling minimum of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_min_time(ts_col="timeCol", cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_time="PT5s")) + update_by(uby_rolling_min_time(ts_col = "timeCol", cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_time = "PT5s")) # compute rolling minimum of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_min_time(ts_col="timeCol", cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") + update_by(uby_rolling_min_time(ts_col = "timeCol", cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling minimum of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_min_time(ts_col="timeCol", cols=c("col1RollMin = col1", "col2RollMin = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_min_time(ts_col = "timeCol", cols = c("col1RollMin = col1", "col2RollMin = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_prod_tick.Rd b/R/rdeephaven/man/uby_rolling_prod_tick.Rd index 40afb461f45..f5b02b59e8b 100644 --- a/R/rdeephaven/man/uby_rolling_prod_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_prod_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling product of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_prod_tick(cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks=6)) + update_by(uby_rolling_prod_tick(cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks = 6)) # compute rolling product of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_prod_tick(cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") + update_by(uby_rolling_prod_tick(cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling product of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_prod_tick(cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_prod_tick(cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_prod_time.Rd b/R/rdeephaven/man/uby_rolling_prod_time.Rd index 84af4a1e354..d0dfe2f92d3 100644 --- a/R/rdeephaven/man/uby_rolling_prod_time.Rd +++ b/R/rdeephaven/man/uby_rolling_prod_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling product of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_prod_time(ts_col="timeCol", cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_time="PT5s")) + update_by(uby_rolling_prod_time(ts_col = "timeCol", cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_time = "PT5s")) # compute rolling product of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_prod_time(ts_col="timeCol", cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") + update_by(uby_rolling_prod_time(ts_col = "timeCol", cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling product of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_prod_time(ts_col="timeCol", cols=c("col1RollProd = col1", "col2RollProd = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_prod_time(ts_col = "timeCol", cols = c("col1RollProd = col1", "col2RollProd = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_std_tick.Rd b/R/rdeephaven/man/uby_rolling_std_tick.Rd index b41c629f069..889d7047a8f 100644 --- a/R/rdeephaven/man/uby_rolling_std_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_std_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling standard deviation of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_std_tick(cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks=6)) + update_by(uby_rolling_std_tick(cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks = 6)) # compute rolling standard deviation of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_std_tick(cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks=6, fwd_ticks=5), by="boolCol") + update_by(uby_rolling_std_tick(cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling standard deviation of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_std_tick(cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks=1, fwd_ticks=10), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_std_tick(cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_std_time.Rd b/R/rdeephaven/man/uby_rolling_std_time.Rd index 4dab0224e20..88d1a0e105b 100644 --- a/R/rdeephaven/man/uby_rolling_std_time.Rd +++ b/R/rdeephaven/man/uby_rolling_std_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling standard deviation of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_std_time(ts_col="timeCol", cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_time="PT5s")) + update_by(uby_rolling_std_time(ts_col = "timeCol", cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_time = "PT5s")) # compute rolling standard deviation of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_std_time(ts_col="timeCol", cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_time="PT5s", fwd_ticks="PT5s"), by="boolCol") + update_by(uby_rolling_std_time(ts_col = "timeCol", cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling standard deviation of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_std_time(ts_col="timeCol", cols=c("col1RollStd = col1", "col2RollStd = col2"), rev_time="PT0s", fwd_time="PT10s"), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_std_time(ts_col = "timeCol", cols = c("col1RollStd = col1", "col2RollStd = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_sum_tick.Rd b/R/rdeephaven/man/uby_rolling_sum_tick.Rd index 31df03a4118..ccf14d79d31 100644 --- a/R/rdeephaven/man/uby_rolling_sum_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_sum_tick.Rd @@ -44,18 +44,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -64,16 +67,16 @@ th <- client$import_table(df) # compute rolling sum of col1 and col2, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_sum_tick(cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks=6)) + update_by(uby_rolling_sum_tick(cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks = 6)) # compute rolling sum of col1 and col2, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_sum_tick(cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks=6, fwd_ticks=5)), by="boolCol") + update_by(uby_rolling_sum_tick(cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling sum of col1 and col2, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_sum_tick(cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks=1, fwd_ticks=10)), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_sum_tick(cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_sum_time.Rd b/R/rdeephaven/man/uby_rolling_sum_time.Rd index b8e028ebc5b..1111e4dd923 100644 --- a/R/rdeephaven/man/uby_rolling_sum_time.Rd +++ b/R/rdeephaven/man/uby_rolling_sum_time.Rd @@ -43,18 +43,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -63,16 +66,16 @@ th <- client$import_table(df) # compute rolling sum of col1 and col2, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_sum_time(ts_col="timeCol", cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_time="PT5s")) + update_by(uby_rolling_sum_time(ts_col = "timeCol", cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_time = "PT5s")) # compute rolling sum of col1 and col2, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_sum_time(ts_col="timeCol", cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_time="PT5s", fwd_ticks="PT5s")), by="boolCol") + update_by(uby_rolling_sum_time(ts_col = "timeCol", cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling sum of col1 and col2, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_sum_time(ts_col="timeCol", cols=c("col1RollSum = col1", "col2RollSum = col2"), rev_time="PT0s", fwd_time="PT10s")), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_sum_time(ts_col = "timeCol", cols = c("col1RollSum = col1", "col2RollSum = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_wavg_tick.Rd b/R/rdeephaven/man/uby_rolling_wavg_tick.Rd index b4c2df54117..98c72197788 100644 --- a/R/rdeephaven/man/uby_rolling_wavg_tick.Rd +++ b/R/rdeephaven/man/uby_rolling_wavg_tick.Rd @@ -46,18 +46,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -66,16 +69,16 @@ th <- client$import_table(df) # compute rolling weighted average of col1 and col2, weighted by col3, using the previous 5 rows and current row th1 <- th$ - update_by(uby_rolling_wavg_tick(wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks=6)) + update_by(uby_rolling_wavg_tick(wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks = 6)) # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol, using previous 5 rows, current row, and following 5 rows th2 <- th$ - update_by(uby_rolling_wavg_tick(wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks=6, fwd_ticks=5), by="boolCol") + update_by(uby_rolling_wavg_tick(wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks = 6, fwd_ticks = 5), by = "boolCol") # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol and parity of col3, using current row and following 10 rows th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_wavg_tick(wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks=1, fwd_ticks=10), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_wavg_tick(wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_ticks = 1, fwd_ticks = 10), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/man/uby_rolling_wavg_time.Rd b/R/rdeephaven/man/uby_rolling_wavg_time.Rd index a8dcc5f98ae..09cb4269eda 100644 --- a/R/rdeephaven/man/uby_rolling_wavg_time.Rd +++ b/R/rdeephaven/man/uby_rolling_wavg_time.Rd @@ -45,18 +45,21 @@ This function, like other Deephaven \code{uby} functions, is a generator functio function called an \code{\link{UpdateByOp}} intended to be used in a call to \code{update_by()}. This detail is typically hidden from the user. However, it is important to understand this detail for debugging purposes, as the output of a \code{uby} function can otherwise seem unexpected. + +For more information, see the vignette on \code{uby} functions by running +\code{vignette("update_by")}. } \examples{ \dontrun{ library(rdeephaven) # connecting to Deephaven server -client <- Client$new("localhost:10000", auth_type="psk", auth_token="my_secret_token") +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") # create data frame, push to server, retrieve TableHandle df <- data.frame( timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], - boolCol = sample(c(TRUE,FALSE), 500, TRUE), + boolCol = sample(c(TRUE, FALSE), 500, TRUE), col1 = sample(10000, size = 500, replace = TRUE), col2 = sample(10000, size = 500, replace = TRUE), col3 = 1:500 @@ -65,16 +68,16 @@ th <- client$import_table(df) # compute rolling weighted average of col1 and col2, weighted by col3, using the previous 5 seconds th1 <- th$ - update_by(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time="PT5s")) + update_by(uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time = "PT5s")) # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol, using previous 5 seconds, and following 5 seconds th2 <- th$ - update_by(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time="PT5s", fwd_ticks="PT5s"), by="boolCol") + update_by(uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time = "PT5s", fwd_ticks = "PT5s"), by = "boolCol") # compute rolling weighted average of col1 and col2, weighted by col3, grouped by boolCol and parity of col3, using following 10 seconds th3 <- th$ update("col3Parity = col3 \% 2")$ - update_by(uby_rolling_wavg_time(ts_col="timeCol", wcol="col3", cols=c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time="PT0s", fwd_time="PT10s"), by=c("boolCol", "col3Parity")) + update_by(uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1RollWAvg = col1", "col2RollWAvg = col2"), rev_time = "PT0s", fwd_time = "PT10s"), by = c("boolCol", "col3Parity")) client$close() } diff --git a/R/rdeephaven/pkgdown/_pkgdown.yml b/R/rdeephaven/pkgdown/_pkgdown.yml new file mode 100644 index 00000000000..14c104a8548 --- /dev/null +++ b/R/rdeephaven/pkgdown/_pkgdown.yml @@ -0,0 +1,47 @@ +--- +url: https://deephaven.io/core/rdoc/ + +template: + bootstrap: 5 + bootswatch: cosmo + +navbar: + structure: + left: [reference, articles] + right: [search, github] + components: + articles: + text: Articles + menu: + - text: Overview + href: articles/rdeephaven.html + - text: Aggregations with agg_by + href: articles/agg_by.html + - text: Moving Operations with update_by + href: articles/update_by.html + +reference: + - title: Core API + contents: + - Client + - TableHandle + - AggOp + - UpdateByOp + + - title: AggBy Operations + contents: + - starts_with("agg_") + + - title: UpdateBy Operations + contents: + - starts_with("uby_") + + - title: Utility Functions + contents: + - merge_tables + - op_control + + - title: Other Data Types + contents: + - OperationControl + diff --git a/R/rdeephaven/vignettes/.gitignore b/R/rdeephaven/vignettes/.gitignore new file mode 100644 index 00000000000..097b241637d --- /dev/null +++ b/R/rdeephaven/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/R/rdeephaven/vignettes/agg_by.Rmd b/R/rdeephaven/vignettes/agg_by.Rmd new file mode 100644 index 00000000000..0fd5bab3f38 --- /dev/null +++ b/R/rdeephaven/vignettes/agg_by.Rmd @@ -0,0 +1,120 @@ +--- +title: "Aggregations with agg_by" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{agg_by} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Table aggregations are a quintessential feature of Deephaven. You can apply as +many aggregations as needed to static tables _or_ streaming tables, and if the +parent tables are streaming, the resulting aggregated tables will update +alongside their parent tables. It is also very easy to perform _grouped_ +aggregations, which allow you to aggregate tables on a per-group basis. + +## Applying aggregations to a table + +There are two methods for performing aggregations on a table, `agg_by()` and +`agg_all_by()`. `agg_by()` allows you to perform many aggregations on specified +columns, while `agg_all_by()` allows you to perform a single aggregation to +every non-grouping column in the table. Both methods have an optional `by` +parameter that is used to specify grouping columns. Here are some details on +each method: + +- `TableHandle$agg_by(aggs, by)`: Creates a new table containing grouping + columns and grouped data. + The resulting grouped data is defined by the aggregation(s) specified. +- `TableHandle$agg_all_by(agg, by)`: Creates a new table containing grouping + columns and grouped data. The resulting grouped data is defined by the + aggregation specified. This method applies the aggregation to all non-grouping + columns of the table, so it can only accept one aggregation at a time. + +The `agg_by()` and `agg_all_by()` methods themselves do not know anything about +the columns on which you want to perform aggregations. Rather, the desired +columns are passed to individual `agg` functions, enabling you to apply various +kinds of aggregations to different columns or groups of columns as needed. + +## `agg` functions + +`agg` functions are used to perform aggregation calculations on grouped data by +passing them to `agg_by()` or `agg_all_by()`. These functions are _generators_, +meaning they return _functions_ that the Deephaven engine knows how to +interpret. We call the functions that they return `AggOp`s. See `?AggOp` for +more information. These AggOps are not R-level functions, but +Deephaven-specific data types that perform all of the intensive calculations. +Here is a list of all `agg` functions available in Deephaven: + +- `agg_first()` +- `agg_last()` +- `agg_min()` +- `agg_max()` +- `agg_sum()` +- `agg_abs_sum()` +- `agg_avg()` +- `agg_w_avg()` +- `agg_median()` +- `agg_var()` +- `agg_std()` +- `agg_percentile()` +- `agg_count()` + +For more details on each aggregation function, see the reference documentation +by running `?agg_first`, `?agg_last`, etc. + +## An Example +```{r, eval=FALSE} +library(rdeephaven) + +# connecting to Deephaven server +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") + +# create data frame, push to server, retrieve TableHandle +df <- data.frame( + X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), + Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), + Number1 = c(100, -44, 49, 11, -66, 50, 29, 18, -70), + Number2 = c(-55, 76, 20, 130, 230, -50, 73, 137, 214) +) +th <- client$import_table(df) + +# get first and last elements of each column +th1 <- th$ + agg_by( + agg_first(c("XFirst = X", "YFirst = Y", "Number1First = Number1", "Number2First = Number2")), + agg_last(c("XLast = X", "YLast = Y", "Number1Last = Number1", "Number2Last = Number2")) +) + +# compute mean and standard deviation of Number1 and Number2, grouped by X +th2 <- th$ + agg_by( + c( + agg_avg(c("Number1Avg = Number1", "Number2Avg = Number2")), + agg_std(c("Number1Std = Number1", "Number2Std = Number2")) + ), + by = "X" +) + +# compute maximum of all non-grouping columns, grouped by X and Y +th3 <- th$ + agg_all_by(agg_max(), by = c("X", "Y")) + +# compute minimum and maximum of Number1 and Number2 respectively grouped by Y +th4 <- th$ + agg_by( + c( + agg_min("Number1Min = Number1"), + agg_max("Number2Max = Number2") + ), + by = "Y" +) + +client$close() +``` diff --git a/R/rdeephaven/vignettes/rdeephaven.Rmd b/R/rdeephaven/vignettes/rdeephaven.Rmd new file mode 100644 index 00000000000..b19df258a6c --- /dev/null +++ b/R/rdeephaven/vignettes/rdeephaven.Rmd @@ -0,0 +1,130 @@ +--- +title: "Overview of the Deephaven Core R Client" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{rdeephaven} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +The Deephaven Core R Client provides an R interface to Deephaven's powerful +real-time data engine, [_Deephaven Core_](https://deephaven.io/community/). +To use this package, you must have a Deephaven server running and be able to +connect to it. For more information on how to set up a Deephaven server, see the +documentation [here](https://deephaven.io/core/docs/tutorials/quickstart/). + +## Building blocks of the Deephaven R Client + +There are two primary R classes that make up the Deephaven R Client, the `Client` +class and the `TableHandle` class. See `?Client` and `?TableHandle` for more +information. The Client class is used to establish a connection to the +Deephaven server with its constructor `Client$new()`, and to send server +requests, such as running a script via `run_script()`, or pushing local data to +the server via `import_table()`. Basic usage of the Client class may look +something like this: + +```{r, eval=FALSE} +library(rdeephaven) +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") +``` + +Many of these server requests end up creating or modifying tables that live on +the server. To keep track of these tables, the R client retrieves references to +them, and wraps these references in TableHandle objects. These TableHandles +have a host of methods that mirror server-side table operations, such as +`head()`, `tail()`, `update_by()`, and so on. So, you can typically use +TableHandles _as if_ they are tables themselves, and all of the corresponding +methods that you call on them will be executed on the server. Here is a simple +example of pushing data to the server, retrieving a TableHandle to the resulting +table, and applying some basic table operations to the table: + +```{r, eval=FALSE} +df1 <- data.frame(x = 1:10, y = 11:20) +th1 <- client$import_table(df1) + +th2 <- th1$ + update("z = x + y")$ + where("z % 4 == 0") +``` + +TableHandles also support common functional methods for converting server-side +Deephaven tables to R objects stored in local memory such as `as.data.frame()`, +`as_tibble()`, and `as_arrow_table()`. Here's an example of converting the table +created above to an R data frame and verifying that other functional methods +work as expected: + +```{r, eval=FALSE} +df2 <- as.data.frame(th2) + +print(nrow(th2) == nrow(df2)) +print(ncol(th2) == ncol(df2)) +print(dim(th2) == dim(df2)) +print(all(as.data.frame(head(th2, 2)) == head(df2, 2))) +print(all(as.data.frame(tail(th2, 2)) == tail(df2, 2))) +``` + +For more information on these classes and all of their methods, see the +reference documentation by running `?Client` or `?TableHandle`. + +## Real-time data analysis + +Since TableHandles are references to tables living on the Deephaven server, they +may refer to streaming tables, or tables that are receiving new data +periodically(typically once per second). Here's a simple example of creating a +table that adds a new row every second: + +```{r, eval=FALSE} +th3 <- client$time_table("PT1s")$ + update(c("X = ii", "Y = sin(X)")) +``` + +R objects like data frames or Dplyr tibbles do not have this streaming property +- they are always static objects stored in memory. However, a TableHandle +referring to a streaming table may be converted to a data frame or tibble at any +time, and the resulting object will be a snapshot of the table at the time of +conversion. This means that you can use the Deephaven R Client to perform +real-time data analysis on streaming data! Here, we make a simple plot of the +ticking table, and call it three times to demonstrate the dynamic nature of the +table: + +```{r, eval=FALSE} +plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type = "l") +Sys.sleep(5) +plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type = "l") +Sys.sleep(5) +plot(as.data.frame(th3)$X, as.data.frame(th3)$Y, type = "l") +``` + +There are performance and memory considerations when pulling data from the +server, so it is best to use the provided TableHandle methods to perform as much +of your analysis as possible on the server, and to only pull the data when +something _must_ be done in R, like plotting or writing to a local file. + +## Powerful table operations + +Much of the power of Deephaven's suite of table operations is achieved through +the use of the `update_by()` and `agg_by()` methods. These table methods are +important enough to warrant their own documentation pages, accessible by running +`vignette("update_by")` or `vignette("agg_by")`. These methods +come with their own suites of functions, prefixed with `agg_` and `uby_` +respectively, that are listed in their documentation pages. Running +`ls("package:rdeephaven")` will reveal that most of the functions included in +this package are for these methods, so it is important to get acquainted with +them. + +## Getting help +While we've done our best to provide good documentation for this package, you +may find you need more help than what this documentation has to offer. Please +visit the official Deephaven Community Core +[documentation](https://deephaven.io/core/docs/tutorials/quickstart/) to learn +more about Deephaven and to find comprehensive examples. Additionally, feel free +to reach out to us on the Deephaven +[Community Slack channel](https://deephaven.io/slack) with any questions. We +hope you find real-time data analysis in R to be as easy as possible. diff --git a/R/rdeephaven/vignettes/update_by.Rmd b/R/rdeephaven/vignettes/update_by.Rmd new file mode 100644 index 00000000000..2704b46fed0 --- /dev/null +++ b/R/rdeephaven/vignettes/update_by.Rmd @@ -0,0 +1,133 @@ +--- +title: "Moving Operations with update_by" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{update_by} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Deephaven's `update_by()` table method and suite of `uby` functions enable +cumulative and moving calculations on static _and_ streaming tables. Complex +operations like cumulative minima and maxima, exponential moving averages, and +rolling standard deviations are all possible and effortless to execute. As +always in Deephaven, the results of these calculations will continue to update +as their parent tables are updated. Additionally, it's easy to group data by one +or more columns, enabling complex group-wise calculations with a single line of +code. + +## Applying UpdateBy operations to a table + +The table method `update_by()` is the entry point for UpdateBy operations. It +takes two arguments: the first is an `UpdateByOp` or a list of +`UpdateByOp`s denoting the calculations to perform on specific columns of the +table. Then, it takes a column name or a list of column names that define the +groups on which to perform the calculations. If you don't want grouped +calculations, omit this argument. + +To learn more about UpdateByOps, see the reference documentation with +`?UpdateByOp`. + +The `update_by()` method itself does not know anything about the columns on +which you want to perform calculations. Rather, the desired columns are passed +to individual `uby` functions, enabling a massive amount of flexibility. + +## `uby` functions + +`uby` functions are the workers that actually execute the complex UpdateBy +calculations. These functions are _generators_, meaning they return _functions_ +that the Deephaven engine knows how to interpret. We call the functions they +return `UpdateByOp`s. See `?UpdateByOp` for more information. These UpdateByOps +are not R-level functions, but Deephaven-specific data types that perform all of +the intensive calculations. Here is a list of all `uby` functions available in +Deephaven: + +- `uby_cum_min()` +- `uby_cum_max()` +- `uby_cum_sum()` +- `uby_cum_prod()` +- `uby_forward_fill()` +- `uby_delta()` +- `uby_emmin_tick()` +- `uby_emmin_time()` +- `uby_emmax_tick()` +- `uby_emmax_time()` +- `uby_ems_tick()` +- `uby_ems_time()` +- `uby_ema_tick()` +- `uby_ema_time()` +- `uby_emstd_tick()` +- `uby_emstd_time()` +- `uby_rolling_count_tick()` +- `uby_rolling_count_time()` +- `uby_rolling_group_tick()` +- `uby_rolling_group_time()` +- `uby_rolling_min_tick()` +- `uby_rolling_min_time()` +- `uby_rolling_max_tick()` +- `uby_rolling_max_time()` +- `uby_rolling_sum_tick()` +- `uby_rolling_sum_time()` +- `uby_rolling_prod_tick()` +- `uby_rolling_prod_time()` +- `uby_rolling_avg_tick()` +- `uby_rolling_avg_time()` +- `uby_rolling_wavg_tick()` +- `uby_rolling_wavg_time()` +- `uby_rolling_std_tick()` +- `uby_rolling_std_time()` + +For more details on each aggregation function, see the reference documentation +by running `?uby_cum_min`, `?uby_delta`, etc. + +## An Example +```{r, eval=FALSE} +library(rdeephaven) + +# connecting to Deephaven server +client <- Client$new("localhost:10000", auth_type = "psk", auth_token = "my_secret_token") + +# create data frame, push to server, retrieve TableHandle +df <- data.frame( + timeCol = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 0.01), by = "1 sec")[1:500], + boolCol = sample(c(TRUE, FALSE), 500, TRUE), + col1 = sample(10000, size = 500, replace = TRUE), + col2 = sample(10000, size = 500, replace = TRUE), + col3 = 1:500 +) +th <- client$import_table(df) + +# compute 10-row exponential weighted moving average of col1 and col2, grouped by boolCol +th1 <- th$ + update_by(uby_ema_tick(decay_ticks = 10, cols = c("col1Ema = col1", "col2Ema = col2")), by = "boolCol") + +# compute rolling 10-second weighted average and standard deviation of col1 and col2, weighted by col3 +th2 <- th$ + update_by( + c( + uby_rolling_wavg_time(ts_col = "timeCol", wcol = "col3", cols = c("col1WAvg = col1", "col2WAvg = col2"), rev_time = "PT10s"), + uby_rolling_std_time(ts_col = "timeCol", cols = c("col1Std = col1", "col2Std = col2"), rev_time = "PT10s") + ) +) + +# compute cumulative minimum and maximum of col1 and col2 respectively, and the rolling 20-row sum of col3, grouped by boolCol +th3 <- th$ + update_by( + c( + uby_cum_min(cols = "col1"), + uby_cum_max(cols = "col2"), + uby_rolling_sum_tick(cols = "col3", rev_ticks = 20) + ), + by = "boolCol" +) + +client$close() +} +``` \ No newline at end of file diff --git a/docker/registry/cpp-clients-multi-base/gradle.properties b/docker/registry/cpp-clients-multi-base/gradle.properties index 11c131793a6..0e9b923c556 100644 --- a/docker/registry/cpp-clients-multi-base/gradle.properties +++ b/docker/registry/cpp-clients-multi-base/gradle.properties @@ -1,4 +1,4 @@ io.deephaven.project.ProjectType=DOCKER_REGISTRY deephaven.registry.imageName=ghcr.io/deephaven/cpp-clients-multi-base:latest -deephaven.registry.imageId=ghcr.io/deephaven/cpp-clients-multi-base@sha256:e9d3cbd9cf5f95162e559a8e4b78fec20ddbc559048d8b0167db161f8b748d55 +deephaven.registry.imageId=ghcr.io/deephaven/cpp-clients-multi-base@sha256:f4797606e1a2e06349289f0cd9a73f310d37f4509b10b1637ec62589a8d05b12 deephaven.registry.platform=linux/amd64 From 953d0f2e6748eafc6b99d668a3ef51f26f346a16 Mon Sep 17 00:00:00 2001 From: Jianfeng Mao <4297243+jmao-denver@users.noreply.github.com> Date: Thu, 16 Nov 2023 20:02:14 -0700 Subject: [PATCH 33/41] Apply query scope ctx on formula aggregation (#4839) * Apply query scope ctx on formula aggregation * Add more tests --- py/server/deephaven/agg.py | 4 ++ py/server/deephaven/table.py | 66 ++++++++++++++++++++++---------- py/server/tests/test_pt_proxy.py | 18 +++++++++ py/server/tests/test_table.py | 46 +++++++++++++++++++++- 4 files changed, 112 insertions(+), 22 deletions(-) diff --git a/py/server/deephaven/agg.py b/py/server/deephaven/agg.py index 7e4c4293f73..09e985b5330 100644 --- a/py/server/deephaven/agg.py +++ b/py/server/deephaven/agg.py @@ -44,6 +44,10 @@ def j_agg_spec(self): raise DHError(message="unsupported aggregation operation.") return self._j_agg_spec + @property + def is_formula(self): + return isinstance(self._j_agg_spec, jpy.get_type("io.deephaven.api.agg.spec.AggSpecFormula")) + def sum_(cols: Union[str, List[str]] = None) -> Aggregation: """Creates a Sum aggregation. diff --git a/py/server/deephaven/table.py b/py/server/deephaven/table.py index 1a55c58f823..e46348667b9 100644 --- a/py/server/deephaven/table.py +++ b/py/server/deephaven/table.py @@ -563,6 +563,15 @@ def _query_scope_ctx(): yield +def _query_scope_agg_ctx(aggs: Sequence[Aggregation]) -> contextlib.AbstractContextManager: + has_agg_formula = any([agg.is_formula for agg in aggs]) + if has_agg_formula: + cm = _query_scope_ctx() + else: + cm = contextlib.nullcontext() + return cm + + class SortDirection(Enum): """An enum defining the sorting orders.""" DESCENDING = auto() @@ -1961,13 +1970,16 @@ def agg_by(self, aggs: Union[Aggregation, Sequence[Aggregation]], by: Union[str, if not by and initial_groups: raise ValueError("missing group-by column names when initial_groups is provided.") j_agg_list = j_array_list([agg.j_aggregation for agg in aggs]) - if not by: - return Table(j_table=self.j_table.aggBy(j_agg_list, preserve_empty)) - else: - j_column_name_list = j_array_list([_JColumnName.of(col) for col in by]) - initial_groups = unwrap(initial_groups) - return Table( - j_table=self.j_table.aggBy(j_agg_list, preserve_empty, initial_groups, j_column_name_list)) + + cm = _query_scope_agg_ctx(aggs) + with cm: + if not by: + return Table(j_table=self.j_table.aggBy(j_agg_list, preserve_empty)) + else: + j_column_name_list = j_array_list([_JColumnName.of(col) for col in by]) + initial_groups = unwrap(initial_groups) + return Table( + j_table=self.j_table.aggBy(j_agg_list, preserve_empty, initial_groups, j_column_name_list)) except Exception as e: raise DHError(e, "table agg_by operation failed.") from e @@ -2004,8 +2016,11 @@ def partitioned_agg_by(self, aggs: Union[Aggregation, Sequence[Aggregation]], by = to_sequence(by) j_agg_list = j_array_list([agg.j_aggregation for agg in aggs]) initial_groups = unwrap(initial_groups) - return PartitionedTable( - j_partitioned_table=self.j_table.partitionedAggBy(j_agg_list, preserve_empty, initial_groups, *by)) + + cm = _query_scope_agg_ctx(aggs) + with cm: + return PartitionedTable( + j_partitioned_table=self.j_table.partitionedAggBy(j_agg_list, preserve_empty, initial_groups, *by)) except Exception as e: raise DHError(e, "table partitioned_agg_by operation failed.") from e @@ -2028,7 +2043,9 @@ def agg_all_by(self, agg: Aggregation, by: Union[str, Sequence[str]] = None) -> """ try: by = to_sequence(by) - return Table(j_table=self.j_table.aggAllBy(agg.j_agg_spec, *by)) + cm = _query_scope_agg_ctx([agg]) + with cm: + return Table(j_table=self.j_table.aggAllBy(agg.j_agg_spec, *by)) except Exception as e: raise DHError(e, "table agg_all_by operation failed.") from e @@ -2276,12 +2293,15 @@ def rollup(self, aggs: Union[Aggregation, Sequence[Aggregation]], by: Union[str, aggs = to_sequence(aggs) by = to_sequence(by) j_agg_list = j_array_list([agg.j_aggregation for agg in aggs]) - if not by: - return RollupTable(j_rollup_table=self.j_table.rollup(j_agg_list, include_constituents), aggs=aggs, - include_constituents=include_constituents, by=by) - else: - return RollupTable(j_rollup_table=self.j_table.rollup(j_agg_list, include_constituents, by), - aggs=aggs, include_constituents=include_constituents, by=by) + + cm = _query_scope_agg_ctx(aggs) + with cm: + if not by: + return RollupTable(j_rollup_table=self.j_table.rollup(j_agg_list, include_constituents), aggs=aggs, + include_constituents=include_constituents, by=by) + else: + return RollupTable(j_rollup_table=self.j_table.rollup(j_agg_list, include_constituents, by), + aggs=aggs, include_constituents=include_constituents, by=by) except Exception as e: raise DHError(e, "table rollup operation failed.") from e @@ -3299,8 +3319,11 @@ def agg_by(self, aggs: Union[Aggregation, Sequence[Aggregation]], aggs = to_sequence(aggs) by = to_sequence(by) j_agg_list = j_array_list([agg.j_aggregation for agg in aggs]) - with auto_locking_ctx(self): - return PartitionedTableProxy(j_pt_proxy=self.j_pt_proxy.aggBy(j_agg_list, *by)) + + cm = _query_scope_agg_ctx(aggs) + with cm: + with auto_locking_ctx(self): + return PartitionedTableProxy(j_pt_proxy=self.j_pt_proxy.aggBy(j_agg_list, *by)) except Exception as e: raise DHError(e, "agg_by operation on the PartitionedTableProxy failed.") from e @@ -3324,8 +3347,11 @@ def agg_all_by(self, agg: Aggregation, by: Union[str, Sequence[str]] = None) -> """ try: by = to_sequence(by) - with auto_locking_ctx(self): - return PartitionedTableProxy(j_pt_proxy=self.j_pt_proxy.aggAllBy(agg.j_agg_spec, *by)) + + cm = _query_scope_agg_ctx([agg]) + with cm: + with auto_locking_ctx(self): + return PartitionedTableProxy(j_pt_proxy=self.j_pt_proxy.aggAllBy(agg.j_agg_spec, *by)) except Exception as e: raise DHError(e, "agg_all_by operation on the PartitionedTableProxy failed.") from e diff --git a/py/server/tests/test_pt_proxy.py b/py/server/tests/test_pt_proxy.py index fcb802f1a76..99c7414aa55 100644 --- a/py/server/tests/test_pt_proxy.py +++ b/py/server/tests/test_pt_proxy.py @@ -345,6 +345,24 @@ def local_fn() -> str: self.assertIsNotNone(inner_func("param str")) + @unittest.skip("https://github.com/deephaven/deephaven-core/issues/4847") + def test_agg_formula_scope(self): + with self.subTest("agg_by_formula"): + def agg_by_formula(): + def my_fn(vals): + import deephaven.dtypes as dht + return dht.array(dht.double, [i + 2 for i in vals]) + + t = empty_table(1000).update_view(["A=i%2", "B=A+3"]) + pt_proxy = t.partition_by("A").proxy() + rlt_pt_proxy = pt_proxy.agg_by([formula("(double[])my_fn(each)", formula_param='each', cols=['C=B']), + median("B")], + by='A') + return rlt_pt_proxy + + ptp = agg_by_formula() + self.assertIsNotNone(ptp) + def global_fn() -> str: return "global str" diff --git a/py/server/tests/test_table.py b/py/server/tests/test_table.py index 816c4e74f99..334d9415a3c 100644 --- a/py/server/tests/test_table.py +++ b/py/server/tests/test_table.py @@ -943,7 +943,7 @@ def make_pairs_3(tid, a, b): def test_callable_attrs_in_query(self): input_cols = [ - datetime_col(name="DTCol", data=[1,10000000]), + datetime_col(name="DTCol", data=[1, 10000000]), ] test_table = new_table(cols=input_cols) rt = test_table.update("Year = (int)year(DTCol, timeZone(`ET`))") @@ -1025,7 +1025,7 @@ def test_agg_with_options(self): unique(cols=["ua = a", "ub = b"], include_nulls=True, non_unique_sentinel=-1), count_distinct(cols=["csa = a", "csb = b"], count_nulls=True), distinct(cols=["da = a", "db = b"], include_nulls=True), - ] + ] rt = test_table.agg_by(aggs=aggs, by=["c"]) self.assertEqual(rt.size, test_table.select_distinct(["c"]).size) @@ -1063,6 +1063,48 @@ def test_agg_count_and_partition_error(self): t.agg_by(aggs=[partition(["A"])], by=["B"]) self.assertIn("string value", str(cm.exception)) + def test_agg_formula_scope(self): + with self.subTest("agg_by_formula"): + def agg_by_formula(): + def my_fn(vals): + import deephaven.dtypes as dht + return dht.array(dht.double, [i + 2 for i in vals]) + + t = empty_table(1000).update_view(["A=i%2", "B=A+3"]) + t = t.agg_by([formula("(double[])my_fn(each)", formula_param='each', cols=['C=B']), median("B")], + by='A') + return t + + t = agg_by_formula() + self.assertIsNotNone(t) + + with self.subTest("agg_all_by_formula"): + def agg_all_by_formula(): + def my_fn(vals): + import deephaven.dtypes as dht + return dht.array(dht.double, [i + 2 for i in vals]) + + t = empty_table(1000).update_view(["A=i%2", "B=A+3"]) + t = t.agg_all_by(formula("(double[])my_fn(each)", formula_param='each', cols=['C=B']), by='A') + return t + + t = agg_all_by_formula() + self.assertIsNotNone(t) + + with self.subTest("partitioned_by_formula"): + def partitioned_by_formula(): + def my_fn(vals): + import deephaven.dtypes as dht + return dht.array(dht.double, [i + 2 for i in vals]) + + t = empty_table(10).update(["grp_id=(int)(i/5)", "var=(int)i", "weights=(double)1.0/(i+1)"]) + t = t.partitioned_agg_by(aggs=formula("(double[])my_fn(each)", formula_param='each', + cols=['C=weights']), by="grp_id") + return t + + t = partitioned_by_formula() + self.assertIsNotNone(t) + if __name__ == "__main__": unittest.main() From 53049b816d899570fcf6bc4867ad2a934e9d2d4d Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 17 Nov 2023 08:19:45 -0800 Subject: [PATCH 34/41] Add topic, partition, and timestamp column kafka publishing support (#4771) This adds optional configuration for publishing a kafka record with the topic, partition, and/or timestamp as specified by their respective columns. Additionally, this also adds the ability to specify a default partition. Fixes #4767 --- .../engine/table/ColumnDefinition.java | 24 ++ .../deephaven/engine/table/ColumnSource.java | 27 +-- .../java/io/deephaven/engine/table/Table.java | 18 ++ .../engine/table/TableDefinition.java | 34 +++ .../io/deephaven/engine/table/TypeHelper.java | 36 +++ .../deephaven/kafka/KafkaPublishOptions.java | 91 +++++++- .../java/io/deephaven/kafka/KafkaTools.java | 4 + .../kafka/publish/PublishToKafka.java | 177 +++++++++++--- .../kafka/KafkaPublishOptionsTest.java | 217 ++++++++++++++++++ py/server/deephaven/stream/kafka/producer.py | 45 +++- py/server/tests/test_kafka_producer.py | 123 +++++++++- 11 files changed, 730 insertions(+), 66 deletions(-) create mode 100644 engine/api/src/main/java/io/deephaven/engine/table/TypeHelper.java create mode 100644 extensions/kafka/src/test/java/io/deephaven/kafka/KafkaPublishOptionsTest.java diff --git a/engine/api/src/main/java/io/deephaven/engine/table/ColumnDefinition.java b/engine/api/src/main/java/io/deephaven/engine/table/ColumnDefinition.java index fe19fcee9c9..e729a627bf8 100644 --- a/engine/api/src/main/java/io/deephaven/engine/table/ColumnDefinition.java +++ b/engine/api/src/main/java/io/deephaven/engine/table/ColumnDefinition.java @@ -504,6 +504,30 @@ public void describeDifferences(@NotNull List differences, @NotNull fina } } + /** + * Checks if objects of type {@link #getDataType() dataType} can be cast to {@code destDataType} (equivalent to + * {@code destDataType.isAssignableFrom(dataType)}). If not, this throws a {@link ClassCastException}. + * + * @param destDataType the destination data type + */ + public final void checkCastTo(Class destDataType) { + TypeHelper.checkCastTo("[" + name + "]", dataType, destDataType); + } + + /** + * Checks if objects of type {@link #getDataType() dataType} can be cast to {@code destDataType} (equivalent to + * {@code destDataType.isAssignableFrom(dataType)}) and checks that objects of type {@link #getComponentType() + * componentType} can be cast to {@code destComponentType} (both component types must be present and cast-able, or + * both must be {@code null}; when both present, is equivalent to + * {@code destComponentType.isAssignableFrom(componentType)}). If not, this throws a {@link ClassCastException}. + * + * @param destDataType the destination data type + * @param destComponentType the destination component type, may be {@code null} + */ + public final void checkCastTo(Class destDataType, @Nullable Class destComponentType) { + TypeHelper.checkCastTo("[" + name + "]", dataType, componentType, destDataType, destComponentType); + } + public boolean equals(final Object other) { if (this == other) { return true; diff --git a/engine/api/src/main/java/io/deephaven/engine/table/ColumnSource.java b/engine/api/src/main/java/io/deephaven/engine/table/ColumnSource.java index 8a5e04795d1..9299fcfca7b 100644 --- a/engine/api/src/main/java/io/deephaven/engine/table/ColumnSource.java +++ b/engine/api/src/main/java/io/deephaven/engine/table/ColumnSource.java @@ -150,7 +150,7 @@ default Object exportElement(T tuple, int elementIndex) { * {@code String} data: * *
-     *     ColumnSource<String> colSource = table.getColumnSource("MyString").getParameterized(String.class)
+     *     ColumnSource<String> colSource = table.getColumnSource("MyString").cast(String.class)
      * 
*

* Due to the nature of type erasure, the JVM will still insert an additional cast to {@code TYPE} when elements are @@ -163,11 +163,7 @@ default Object exportElement(T tuple, int elementIndex) { @FinalDefault default ColumnSource cast(Class clazz) { Require.neqNull(clazz, "clazz"); - final Class columnSourceType = getType(); - if (!clazz.isAssignableFrom(columnSourceType)) { - throw new ClassCastException(String.format("Cannot convert column source for type %s to type %s", - columnSourceType.getName(), clazz.getName())); - } + TypeHelper.checkCastTo("ColumnSource", getType(), clazz); // noinspection unchecked return (ColumnSource) this; } @@ -184,7 +180,7 @@ default ColumnSource cast(Class clazz) { * {@code String} data: * *

-     *     ColumnSource<String> colSource = table.getColumnSource("MyString", null).getParameterized(String.class)
+     *     ColumnSource<String> colSource = table.getColumnSource("MyString").cast(String.class, null)
      * 
*

* Due to the nature of type erasure, the JVM will still insert an additional cast to {@code TYPE} when elements are @@ -197,19 +193,10 @@ default ColumnSource cast(Class clazz) { */ @FinalDefault default ColumnSource cast(Class clazz, @Nullable Class componentType) { - final ColumnSource casted = cast(clazz); - final Class columnSourceComponentType = getComponentType(); - if ((componentType == null && columnSourceComponentType == null) || (componentType != null - && columnSourceComponentType != null && componentType.isAssignableFrom(columnSourceComponentType))) { - return casted; - } - final Class columnSourceType = getType(); - throw new ClassCastException(String.format( - "Cannot convert column source componentType for type %s to %s (for %s / %s)", - columnSourceComponentType == null ? null : columnSourceComponentType.getName(), - componentType == null ? null : componentType.getName(), - columnSourceType.getName(), - clazz.getName())); + Require.neqNull(clazz, "clazz"); + TypeHelper.checkCastTo("ColumnSource", getType(), getComponentType(), clazz, componentType); + // noinspection unchecked + return (ColumnSource) this; } /** diff --git a/engine/api/src/main/java/io/deephaven/engine/table/Table.java b/engine/api/src/main/java/io/deephaven/engine/table/Table.java index f872f366076..f06946b2d9c 100644 --- a/engine/api/src/main/java/io/deephaven/engine/table/Table.java +++ b/engine/api/src/main/java/io/deephaven/engine/table/Table.java @@ -228,20 +228,32 @@ public interface Table extends * caller expects. This differs from {@link #getColumnSource(String, Class)} which uses the provided {@link Class} * object to verify that the data type is a subclass of the expected class. * + *

+ * The success of this call is equivalent to {@code getDefinition().checkColumn(sourceName)}, which is the preferred + * way to check for compatibility in scenarios where the caller does not want the implementation to potentially + * invoke {@link #coalesce()}. + * * @param sourceName The name of the column * @param The target type, as a type parameter. Inferred from context. * @return The column source for {@code sourceName}, parameterized by {@code T} + * @see TableDefinition#checkHasColumn(String) */ ColumnSource getColumnSource(String sourceName); /** * Retrieves a {@code ColumnSource} and {@link ColumnSource#cast(Class) casts} it to the target class {@code clazz}. * + *

+ * The success of this call is equivalent to {@code getDefinition().checkColumn(sourceName, clazz)}, which is the + * preferred way to check for compatibility in scenarios where the caller does not want to the implementation to + * potentially invoke {@link #coalesce()}. + * * @param sourceName The name of the column * @param clazz The target type * @param The target type, as a type parameter. Intended to be inferred from {@code clazz}. * @return The column source for {@code sourceName}, parameterized by {@code T} * @see ColumnSource#cast(Class) + * @see TableDefinition#checkHasColumn(String, Class) */ ColumnSource getColumnSource(String sourceName, Class clazz); @@ -249,12 +261,18 @@ public interface Table extends * Retrieves a {@code ColumnSource} and {@link ColumnSource#cast(Class, Class)} casts} it to the target class * {@code clazz} and {@code componentType}. * + *

+ * The success of this call is equivalent to {@code getDefinition().checkColumn(sourceName, clazz, componentType)}, + * which is the preferred way to check for compatibility in scenarios where the caller does not want the + * implementation to potentially invoke {@link #coalesce()}. + * * @param sourceName The name of the column * @param clazz The target type * @param componentType The target component type, may be null * @param The target type, as a type parameter. Intended to be inferred from {@code clazz}. * @return The column source for {@code sourceName}, parameterized by {@code T} * @see ColumnSource#cast(Class, Class) + * @see TableDefinition#checkHasColumn(String, Class, Class) */ ColumnSource getColumnSource(String sourceName, Class clazz, @Nullable Class componentType); diff --git a/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java b/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java index 66aa6b003f7..45fe5a33a37 100644 --- a/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java +++ b/engine/api/src/main/java/io/deephaven/engine/table/TableDefinition.java @@ -313,6 +313,40 @@ public final void checkHasColumn(@NotNull String columnName) { NoSuchColumnException.throwIf(getColumnNameSet(), columnName); } + /** + * Checks if {@code columnName} exists and supports {@link ColumnDefinition#checkCastTo(Class)} with {@code clazz}. + * Otherwise, throws a {@link NoSuchColumnException} or a {@link ClassCastException}. + * + * @param columnName the column name + * @param clazz the data type + * @see ColumnDefinition#checkCastTo(Class) + */ + public final void checkHasColumn(@NotNull String columnName, @NotNull Class clazz) { + final ColumnDefinition cd = getColumn(columnName); + if (cd == null) { + throw new NoSuchColumnException(getColumnNameSet(), columnName); + } + cd.checkCastTo(clazz); + } + + /** + * Checks if {@code columnName} exists and supports {@link ColumnDefinition#checkCastTo(Class, Class)} with + * {@code clazz} and {@code componentType}. Otherwise, throws a {@link NoSuchColumnException} or a + * {@link ClassCastException}. + * + * @param columnName the column name + * @param clazz the data type + * @param componentType the component type + * @see ColumnDefinition#checkCastTo(Class, Class) + */ + public final void checkHasColumn(@NotNull String columnName, @NotNull Class clazz, Class componentType) { + final ColumnDefinition cd = getColumn(columnName); + if (cd == null) { + throw new NoSuchColumnException(getColumnNameSet(), columnName); + } + cd.checkCastTo(clazz, componentType); + } + /** * Check this definition to ensure that all {@code columns} are present. * diff --git a/engine/api/src/main/java/io/deephaven/engine/table/TypeHelper.java b/engine/api/src/main/java/io/deephaven/engine/table/TypeHelper.java new file mode 100644 index 00000000000..e1029ebd21f --- /dev/null +++ b/engine/api/src/main/java/io/deephaven/engine/table/TypeHelper.java @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.engine.table; + +class TypeHelper { + + // Could be good to move this to io.deephaven.qst.type.Type layer + + public static void checkCastTo(String context, Class srcType, Class destType) { + if (!destType.isAssignableFrom(srcType)) { + throw new ClassCastException(String.format("Cannot convert %s of type %s to type %s", + context, srcType.getName(), destType.getName())); + } + } + + public static void checkCastTo( + String prefix, + Class srcType, + Class srcComponentType, + Class destType, + Class destComponentType) { + checkCastTo(prefix, srcType, destType); + if ((srcComponentType == null && destComponentType == null) || (srcComponentType != null + && destComponentType != null && destComponentType.isAssignableFrom(srcComponentType))) { + return; + } + throw new ClassCastException(String.format( + "Cannot convert %s componentType of type %s to %s (for %s / %s)", + prefix, + srcComponentType == null ? null : srcComponentType.getName(), + destComponentType == null ? null : destComponentType.getName(), + srcType.getName(), + destType.getName())); + } +} diff --git a/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaPublishOptions.java b/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaPublishOptions.java index 585ca3ee256..7e6a43d4013 100644 --- a/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaPublishOptions.java +++ b/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaPublishOptions.java @@ -4,6 +4,7 @@ package io.deephaven.kafka; import io.deephaven.annotations.BuildableStyle; +import io.deephaven.api.ColumnName; import io.deephaven.engine.table.Table; import io.deephaven.kafka.KafkaTools.Produce; import io.deephaven.kafka.KafkaTools.Produce.KeyOrValueSpec; @@ -11,6 +12,10 @@ import org.immutables.value.Value.Default; import org.immutables.value.Value.Immutable; +import javax.annotation.Nullable; +import java.time.Instant; +import java.util.Optional; +import java.util.OptionalInt; import java.util.Properties; /** @@ -34,16 +39,26 @@ public static Builder builder() { public abstract Table table(); /** - * The kafka topic to publish to. + * The default Kafka topic to publish to. When {@code null}, {@link #topicColumn()} must be set. * - * @return the kafka topic + * @return the default Kafka topic + * @see #topicColumn() */ + @Nullable public abstract String topic(); /** - * The kafka configuration properties. + * The default Kafka partition to publish to. * - * @return the kafka configuration + * @return the default Kafka partition + * @see #partitionColumn() + */ + public abstract OptionalInt partition(); + + /** + * The Kafka configuration properties. + * + * @return the Kafka configuration */ public abstract Properties config(); @@ -93,6 +108,38 @@ public boolean publishInitial() { return true; } + /** + * The topic column. When set, uses the the given {@link CharSequence}-compatible column from {@link #table()} as + * the first source for setting the Kafka record topic. When not present, or if the column value is null, + * {@link #topic()} will be used. + * + * @return the topic column name + */ + public abstract Optional topicColumn(); + + /** + * The partition column. When set, uses the the given {@code int} column from {@link #table()} as the first source + * for setting the Kafka record partition. When not present, or if the column value is null, {@link #partition()} + * will be used if present. If a valid partition number is specified, that partition will be used when sending the + * record. Otherwise, Kafka will choose a partition using a hash of the key if the key is present, or will assign a + * partition in a round-robin fashion if the key is not present. + * + * @return the partition column name + */ + public abstract Optional partitionColumn(); + + /** + * The timestamp column. When set, uses the the given {@link Instant} column from {@link #table()} as the first + * source for setting the Kafka record timestamp. When not present, or if the column value is null, the producer + * will stamp the record with its current time. The timestamp eventually used by Kafka depends on the timestamp type + * configured for the topic. If the topic is configured to use CreateTime, the timestamp in the producer record will + * be used by the broker. If the topic is configured to use LogAppendTime, the timestamp in the producer record will + * be overwritten by the broker with the broker local time when it appends the message to its log. + * + * @return the timestamp column name + */ + public abstract Optional timestampColumn(); + @Check final void checkNotBothIgnore() { if (Produce.isIgnore(keySpec()) && Produce.isIgnore(valueSpec())) { @@ -114,12 +161,42 @@ final void checkLastBy() { } } + @Check + final void checkTopic() { + if (topic() == null && topicColumn().isEmpty()) { + throw new IllegalArgumentException("Must set topic or topicColumn (or both)"); + } + } + + @Check + final void checkTopicColumn() { + if (topicColumn().isPresent()) { + table().getDefinition().checkHasColumn(topicColumn().get().name(), CharSequence.class); + } + } + + @Check + final void checkPartitionColumn() { + if (partitionColumn().isPresent()) { + table().getDefinition().checkHasColumn(partitionColumn().get().name(), int.class); + } + } + + @Check + final void checkTimestampColumn() { + if (timestampColumn().isPresent()) { + table().getDefinition().checkHasColumn(timestampColumn().get().name(), Instant.class); + } + } + public interface Builder { Builder table(Table table); Builder topic(String topic); + Builder partition(int partition); + Builder config(Properties config); Builder keySpec(KeyOrValueSpec keySpec); @@ -130,6 +207,12 @@ public interface Builder { Builder publishInitial(boolean publishInitial); + Builder topicColumn(ColumnName columnName); + + Builder partitionColumn(ColumnName columnName); + + Builder timestampColumn(ColumnName columnName); + KafkaPublishOptions build(); } } diff --git a/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java b/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java index 9dab3b5b194..a8fae8e84af 100644 --- a/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java +++ b/extensions/kafka/src/main/java/io/deephaven/kafka/KafkaTools.java @@ -1442,12 +1442,16 @@ public static Runnable produceFromTable(KafkaPublishOptions options) { options.config(), effectiveTable, options.topic(), + options.partition().isEmpty() ? null : options.partition().getAsInt(), keyColumns, keySpecSerializer, keySerializer, valueColumns, valueSpecSerializer, valueSerializer, + options.topicColumn().orElse(null), + options.partitionColumn().orElse(null), + options.timestampColumn().orElse(null), options.publishInitial()); } return publisherScope::release; diff --git a/extensions/kafka/src/main/java/io/deephaven/kafka/publish/PublishToKafka.java b/extensions/kafka/src/main/java/io/deephaven/kafka/publish/PublishToKafka.java index 03d464d5729..c1759855889 100644 --- a/extensions/kafka/src/main/java/io/deephaven/kafka/publish/PublishToKafka.java +++ b/extensions/kafka/src/main/java/io/deephaven/kafka/publish/PublishToKafka.java @@ -3,19 +3,28 @@ */ package io.deephaven.kafka.publish; +import io.deephaven.api.ColumnName; import io.deephaven.base.verify.Assert; -import io.deephaven.chunk.attributes.Values; +import io.deephaven.chunk.IntChunk; +import io.deephaven.chunk.LongChunk; +import io.deephaven.chunk.ObjectChunk; import io.deephaven.configuration.Configuration; -import io.deephaven.engine.table.ModifiedColumnSet; -import io.deephaven.engine.table.Table; -import io.deephaven.engine.table.TableUpdate; -import io.deephaven.engine.updategraph.UpdateGraph; import io.deephaven.engine.liveness.LivenessArtifact; import io.deephaven.engine.liveness.LivenessScope; -import io.deephaven.engine.table.impl.*; -import io.deephaven.chunk.ObjectChunk; import io.deephaven.engine.rowset.RowSequence; import io.deephaven.engine.rowset.RowSet; +import io.deephaven.engine.table.ChunkSource; +import io.deephaven.engine.table.ColumnSource; +import io.deephaven.engine.table.ModifiedColumnSet; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.TableUpdate; +import io.deephaven.engine.table.impl.BlinkTableTools; +import io.deephaven.engine.table.impl.InstrumentedTableUpdateListenerAdapter; +import io.deephaven.engine.table.impl.QueryTable; +import io.deephaven.engine.table.impl.sources.ReinterpretUtils; +import io.deephaven.engine.updategraph.UpdateGraph; +import io.deephaven.kafka.KafkaPublishOptions; +import io.deephaven.util.QueryConstants; import io.deephaven.util.SafeCloseable; import io.deephaven.util.annotations.InternalUseOnly; import io.deephaven.util.annotations.ReferentialIntegrity; @@ -26,8 +35,10 @@ import org.apache.kafka.common.serialization.Serializer; import org.jetbrains.annotations.NotNull; +import java.time.Instant; import java.util.Objects; import java.util.Properties; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -43,13 +54,36 @@ public class PublishToKafka extends LivenessArtifact { private final Table table; private final KafkaProducer producer; - private final String topic; + private final String defaultTopic; + private final Integer defaultPartition; private final KeyOrValueSerializer keyChunkSerializer; private final KeyOrValueSerializer valueChunkSerializer; + private final ColumnSource topicColumnSource; + private final ColumnSource partitionColumnSource; + private final ColumnSource timestampColumnSource; @ReferentialIntegrity private final PublishListener publishListener; + /** + * @deprecated please use {@link io.deephaven.kafka.KafkaTools#produceFromTable(KafkaPublishOptions)} + */ + @Deprecated(forRemoval = true) + public PublishToKafka( + final Properties props, + final Table table, + final String topic, + final String[] keyColumns, + final Serializer kafkaKeySerializer, + final KeyOrValueSerializer keyChunkSerializer, + final String[] valueColumns, + final Serializer kafkaValueSerializer, + final KeyOrValueSerializer valueChunkSerializer, + final boolean publishInitial) { + this(props, table, topic, null, keyColumns, kafkaKeySerializer, keyChunkSerializer, valueColumns, + kafkaValueSerializer, valueChunkSerializer, null, null, null, publishInitial); + } + /** *

* Construct a publisher for {@code table} according the to Kafka {@code props} for the supplied {@code topic}. @@ -77,7 +111,8 @@ public class PublishToKafka extends LivenessArtifact { * * @param props The Kafka {@link Properties} * @param table The source {@link Table} - * @param topic The destination topic + * @param defaultTopic The default destination topic + * @param defaultPartition The default destination partition * @param keyColumns Optional array of string column names from table for the columns corresponding to Kafka's Key * field. * @param kafkaKeySerializer The kafka {@link Serializer} to use for keys @@ -89,26 +124,46 @@ public class PublishToKafka extends LivenessArtifact { * @param valueChunkSerializer Optional {@link KeyOrValueSerializer} to consume table data and produce Kafka record * values in chunk-oriented fashion * @param publishInitial If the initial data in {@code table} should be published + * @param topicColumn The topic column. When set, uses the the given {@link CharSequence} column from {@code table} + * as the first source for setting the kafka record topic. + * @param partitionColumn The partition column. When set, uses the the given {@code int} column from {@code table} + * as the first source for setting the kafka record partition. + * @param timestampColumn The timestamp column. When set, uses the the given {@link Instant} column from + * {@code table} as the first source for setting the kafka record timestamp. */ public PublishToKafka( final Properties props, - final Table table, - final String topic, + Table table, + final String defaultTopic, + final Integer defaultPartition, final String[] keyColumns, final Serializer kafkaKeySerializer, final KeyOrValueSerializer keyChunkSerializer, final String[] valueColumns, final Serializer kafkaValueSerializer, final KeyOrValueSerializer valueChunkSerializer, + final ColumnName topicColumn, + final ColumnName partitionColumn, + final ColumnName timestampColumn, final boolean publishInitial) { - this.table = table; + this.table = (table = table.coalesce()); this.producer = new KafkaProducer<>( props, Objects.requireNonNull(kafkaKeySerializer), Objects.requireNonNull(kafkaValueSerializer)); - this.topic = topic; + this.defaultTopic = defaultTopic; + this.defaultPartition = defaultPartition; this.keyChunkSerializer = keyChunkSerializer; this.valueChunkSerializer = valueChunkSerializer; + this.topicColumnSource = topicColumn == null + ? null + : table.getColumnSource(topicColumn.name(), CharSequence.class); + this.partitionColumnSource = partitionColumn == null + ? null + : table.getColumnSource(partitionColumn.name(), int.class); + this.timestampColumnSource = timestampColumn == null + ? null + : ReinterpretUtils.instantToLongSource(table.getColumnSource(timestampColumn.name(), Instant.class)); if (publishInitial) { // Publish the initial table state try (final PublicationGuard guard = new PublicationGuard()) { @@ -133,6 +188,38 @@ private static ModifiedColumnSet getModifiedColumnSet(@NotNull final Table table : ((QueryTable) table).newModifiedColumnSet(columns); } + private String topic(ObjectChunk topicChunk, int index) { + if (topicChunk == null) { + return defaultTopic; + } + final CharSequence charSequence = topicChunk.get(index); + return charSequence == null ? defaultTopic : charSequence.toString(); + } + + private Integer partition(IntChunk partitionChunk, int index) { + if (partitionChunk == null) { + return defaultPartition; + } + final int partition = partitionChunk.get(index); + return partition == QueryConstants.NULL_INT ? defaultPartition : Integer.valueOf(partition); + } + + public static Long timestampMillis(LongChunk nanosChunk, int index) { + if (nanosChunk == null) { + return null; + } + final long nanos = nanosChunk.get(index); + return nanos == QueryConstants.NULL_LONG ? null : TimeUnit.NANOSECONDS.toMillis(nanos); + } + + private static T object(ObjectChunk chunk, int index) { + return chunk == null ? null : chunk.get(index); + } + + private static ChunkSource.GetContext makeGetContext(ColumnSource source, int chunkSize) { + return source == null ? null : source.makeGetContext(chunkSize); + } + private void publishMessages(@NotNull final RowSet rowsToPublish, final boolean usePrevious, final boolean publishValues, @NotNull final PublicationGuard guard) { if (rowsToPublish.isEmpty()) { @@ -142,31 +229,55 @@ private void publishMessages(@NotNull final RowSet rowsToPublish, final boolean final int chunkSize = (int) Math.min(CHUNK_SIZE, rowsToPublish.size()); try (final RowSequence.Iterator rowsIterator = rowsToPublish.getRowSequenceIterator(); - final KeyOrValueSerializer.Context keyContext = - keyChunkSerializer != null ? keyChunkSerializer.makeContext(chunkSize) : null; - final KeyOrValueSerializer.Context valueContext = - publishValues && valueChunkSerializer != null ? valueChunkSerializer.makeContext(chunkSize) - : null) { + final KeyOrValueSerializer.Context keyContext = keyChunkSerializer != null + ? keyChunkSerializer.makeContext(chunkSize) + : null; + final KeyOrValueSerializer.Context valueContext = publishValues && valueChunkSerializer != null + ? valueChunkSerializer.makeContext(chunkSize) + : null; + final ChunkSource.GetContext topicContext = makeGetContext(topicColumnSource, chunkSize); + final ChunkSource.GetContext partitionContext = makeGetContext(partitionColumnSource, chunkSize); + final ChunkSource.GetContext timestampContext = makeGetContext(timestampColumnSource, chunkSize)) { while (rowsIterator.hasMore()) { final RowSequence chunkRowKeys = rowsIterator.getNextRowSequenceWithLength(chunkSize); - final ObjectChunk keyChunk; - if (keyContext != null) { - keyChunk = keyChunkSerializer.handleChunk(keyContext, chunkRowKeys, usePrevious); - } else { - keyChunk = null; - } + final ObjectChunk keyChunk = keyContext == null + ? null + : keyChunkSerializer.handleChunk(keyContext, chunkRowKeys, usePrevious); - final ObjectChunk valueChunk; - if (valueContext != null) { - valueChunk = valueChunkSerializer.handleChunk(valueContext, chunkRowKeys, usePrevious); - } else { - valueChunk = null; - } + final ObjectChunk valueChunk = valueContext == null + ? null + : valueChunkSerializer.handleChunk(valueContext, chunkRowKeys, usePrevious); + + final ObjectChunk topicChunk = topicContext == null + ? null + : (usePrevious + ? topicColumnSource.getPrevChunk(topicContext, chunkRowKeys) + : topicColumnSource.getChunk(topicContext, chunkRowKeys)) + .asObjectChunk(); + + final IntChunk partitionChunk = partitionContext == null + ? null + : (usePrevious + ? partitionColumnSource.getPrevChunk(partitionContext, chunkRowKeys) + : partitionColumnSource.getChunk(partitionContext, chunkRowKeys)) + .asIntChunk(); + + final LongChunk timestampChunk = timestampContext == null + ? null + : (usePrevious + ? timestampColumnSource.getPrevChunk(timestampContext, chunkRowKeys) + : timestampColumnSource.getChunk(timestampContext, chunkRowKeys)) + .asLongChunk(); - for (int ii = 0; ii < chunkRowKeys.intSize(); ++ii) { - final ProducerRecord record = new ProducerRecord<>(topic, - keyChunk != null ? keyChunk.get(ii) : null, valueChunk != null ? valueChunk.get(ii) : null); + final int numRecords = chunkRowKeys.intSize(); + for (int ii = 0; ii < numRecords; ++ii) { + final ProducerRecord record = new ProducerRecord<>( + topic(topicChunk, ii), + partition(partitionChunk, ii), + timestampMillis(timestampChunk, ii), + object(keyChunk, ii), + object(valueChunk, ii)); producer.send(record, guard); } } diff --git a/extensions/kafka/src/test/java/io/deephaven/kafka/KafkaPublishOptionsTest.java b/extensions/kafka/src/test/java/io/deephaven/kafka/KafkaPublishOptionsTest.java new file mode 100644 index 00000000000..b6d351e7d33 --- /dev/null +++ b/extensions/kafka/src/test/java/io/deephaven/kafka/KafkaPublishOptionsTest.java @@ -0,0 +1,217 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.kafka; + +import io.deephaven.api.ColumnName; +import io.deephaven.engine.table.ColumnDefinition; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.table.impl.NoSuchColumnException; +import io.deephaven.engine.util.TableTools; +import io.deephaven.kafka.KafkaTools.Produce; +import org.junit.Test; + +import java.util.Properties; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; + +public class KafkaPublishOptionsTest { + + private static final TableDefinition TD = TableDefinition.of( + ColumnDefinition.ofString("MyTopic"), + ColumnDefinition.ofInt("MyPartition"), + ColumnDefinition.ofTime("MyTimestamp"), + ColumnDefinition.ofString("MyValue")); + + @Test + public void ok() { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .build(); + } + + @Test + public void okPartition() { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .partition(123) + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .build(); + } + + + @Test + public void checkNotBothIgnore() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessage("keySpec and valueSpec can't both be ignore specs"); + } + } + + @Test + public void checkPublishInitial() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .publishInitial(false) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessage("publishInitial==false && table.isRefreshing() == false"); + } + } + + @Test + public void checkLastBy() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .lastBy(true) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessage("Must set a non-ignore keySpec when lastBy() == true"); + } + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .keySpec(Produce.simpleSpec("MyValue")) + .lastBy(true) + .build(); + } + + @Test + public void checkTopic() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .build(); + failBecauseExceptionWasNotThrown(IllegalArgumentException.class); + } catch (IllegalArgumentException e) { + assertThat(e).hasMessage("Must set topic or topicColumn (or both)"); + } + } + + @Test + public void checkTopicColumn() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .topicColumn(ColumnName.of("DoesNotExist")) + .build(); + failBecauseExceptionWasNotThrown(NoSuchColumnException.class); + } catch (NoSuchColumnException e) { + assertThat(e).hasMessageContaining("Unknown column names [DoesNotExist]"); + } + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .topicColumn(ColumnName.of("MyPartition")) + .build(); + failBecauseExceptionWasNotThrown(ClassCastException.class); + } catch (ClassCastException e) { + assertThat(e).hasMessage("Cannot convert [MyPartition] of type int to type java.lang.CharSequence"); + } + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .topicColumn(ColumnName.of("MyTopic")) + .build(); + } + + @Test + public void checkPartitionColumn() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .partitionColumn(ColumnName.of("DoesNotExist")) + .build(); + failBecauseExceptionWasNotThrown(NoSuchColumnException.class); + } catch (NoSuchColumnException e) { + assertThat(e).hasMessageContaining("Unknown column names [DoesNotExist]"); + } + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .partitionColumn(ColumnName.of("MyTopic")) + .build(); + failBecauseExceptionWasNotThrown(ClassCastException.class); + } catch (ClassCastException e) { + assertThat(e).hasMessage("Cannot convert [MyTopic] of type java.lang.String to type int"); + } + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .partitionColumn(ColumnName.of("MyPartition")) + .build(); + } + + @Test + public void checkTimestampColumn() { + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .timestampColumn(ColumnName.of("DoesNotExist")) + .build(); + failBecauseExceptionWasNotThrown(NoSuchColumnException.class); + } catch (NoSuchColumnException e) { + assertThat(e).hasMessageContaining("Unknown column names [DoesNotExist]"); + } + try { + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .timestampColumn(ColumnName.of("MyTopic")) + .build(); + failBecauseExceptionWasNotThrown(ClassCastException.class); + } catch (ClassCastException e) { + assertThat(e).hasMessage("Cannot convert [MyTopic] of type java.lang.String to type java.time.Instant"); + } + KafkaPublishOptions.builder() + .table(TableTools.newTable(TD)) + .topic("HotTopic") + .config(new Properties()) + .valueSpec(Produce.simpleSpec("MyValue")) + .timestampColumn(ColumnName.of("MyTimestamp")) + .build(); + } +} diff --git a/py/server/deephaven/stream/kafka/producer.py b/py/server/deephaven/stream/kafka/producer.py index a9e65aae9a1..0b814ee8e8d 100644 --- a/py/server/deephaven/stream/kafka/producer.py +++ b/py/server/deephaven/stream/kafka/producer.py @@ -3,7 +3,7 @@ # """ The kafka.producer module supports publishing Deephaven tables to Kafka streams. """ -from typing import Dict, Callable, List +from typing import Dict, Callable, List, Optional import jpy @@ -17,6 +17,7 @@ _JAvroSchema = jpy.get_type("org.apache.avro.Schema") _JKafkaTools_Produce = jpy.get_type("io.deephaven.kafka.KafkaTools$Produce") _JKafkaPublishOptions = jpy.get_type("io.deephaven.kafka.KafkaPublishOptions") +_JColumnName = jpy.get_type("io.deephaven.api.ColumnName") class KeyValueSpec(JObjectWrapper): @@ -36,20 +37,24 @@ def j_object(self) -> jpy.JType: def produce( table: Table, kafka_config: Dict, - topic: str, + topic: Optional[str], key_spec: KeyValueSpec, value_spec: KeyValueSpec, last_by_key_columns: bool = False, publish_initial: bool = True, + partition: Optional[int] = None, + topic_col: Optional[str] = None, + partition_col: Optional[str] = None, + timestamp_col: Optional[str] = None, ) -> Callable[[], None]: """Produce to Kafka from a Deephaven table. Args: table (Table): the source table to publish to Kafka - kafka_config (Dict): configuration for the associated kafka producer. + kafka_config (Dict): configuration for the associated Kafka producer. This is used to call the constructor of org.apache.kafka.clients.producer.KafkaProducer; pass any KafkaProducer specific desired configuration here - topic (str): the topic name + topic (Optional[str]): the default topic name. When None, topic_col must be set. See topic_col for behavior. key_spec (KeyValueSpec): specifies how to map table column(s) to the Key field in produced Kafka messages. This should be the result of calling one of the functions simple_spec(), avro_spec() or json_spec() in this module, or the constant KeyValueSpec.IGNORE @@ -61,6 +66,22 @@ def produce( aggregation on table grouped by the input columns of key_spec and publish to Kafka from the result. publish_initial (bool): whether the initial data in table should be published. When False, table.is_refreshing must be True. By default, is True. + partition (Optional[int]): the default partition, None by default. See partition_col for partition behavior. + topic_col (Optional[str]): the topic column, None by default. When set, uses the the given string column from + table as the first source for setting the Kafka record topic. When None, or if the column value is null, topic + will be used. + partition_col (Optional[str]): the partition column, None by default. When set, uses the the given int column + from table as the first source for setting the Kafka record partition. When None, or if the column value is null, + partition will be used if present. If a valid partition number is specified, that partition will be used + when sending the record. Otherwise, Kafka will choose a partition using a hash of the key if the key is present, + or will assign a partition in a round-robin fashion if the key is not present. + timestamp_col (Optional[str]): the timestamp column, None by default. When set, uses the the given timestamp + column from table as the first source for setting the Kafka record timestamp. When None, or if the column value + is null, the producer will stamp the record with its current time. The timestamp eventually used by Kafka + depends on the timestamp type configured for the topic. If the topic is configured to use CreateTime, the + timestamp in the producer record will be used by the broker. If the topic is configured to use LogAppendTime, + the timestamp in the producer record will be overwritten by the broker with the broker local time when it + appends the message to its log. Returns: a callback that, when invoked, stops publishing and cleans up subscriptions and resources. @@ -77,20 +98,28 @@ def produce( ) if not publish_initial and not table.is_refreshing: raise ValueError("publish_initial == False and table.is_refreshing == False") - options = ( + options_builder = ( _JKafkaPublishOptions.builder() .table(table.j_table) - .topic(topic) .config(j_properties(kafka_config)) .keySpec(key_spec.j_object) .valueSpec(value_spec.j_object) .lastBy(last_by_key_columns and key_spec is not KeyValueSpec.IGNORE) .publishInitial(publish_initial) - .build() ) + if topic: + options_builder.topic(topic) + if partition: + options_builder.partition(partition) + if topic_col: + options_builder.topicColumn(_JColumnName.of(topic_col)) + if partition_col: + options_builder.partitionColumn(_JColumnName.of(partition_col)) + if timestamp_col: + options_builder.timestampColumn(_JColumnName.of(timestamp_col)) with auto_locking_ctx(table): - runnable = _JKafkaTools.produceFromTable(options) + runnable = _JKafkaTools.produceFromTable(options_builder.build()) def cleanup(): try: diff --git a/py/server/tests/test_kafka_producer.py b/py/server/tests/test_kafka_producer.py index 9b806d702db..d71b8ca8960 100644 --- a/py/server/tests/test_kafka_producer.py +++ b/py/server/tests/test_kafka_producer.py @@ -4,9 +4,10 @@ import os import unittest +from datetime import datetime from deephaven import kafka_producer as pk, new_table, time_table -from deephaven.column import string_col, int_col, double_col +from deephaven.column import string_col, int_col, double_col, datetime_col from deephaven.stream import kafka from deephaven.stream.kafka.producer import KeyValueSpec from tests.testbase import BaseTestCase @@ -50,6 +51,126 @@ def test_simple_spec(self): self.assertIsNotNone(cleanup) cleanup() + def test_simple_spec_topic_col_no_default_topic(self): + """ + Check a simple Kafka producer works with a topic column but no default topic + """ + t = new_table(cols=[ + string_col('Topic', ['orders_a', 'orders_b', 'orders_a', 'orders_b']), + double_col('Price', [10.0, 10.5, 11.0, 11.5]) + ]) + cleanup = pk.produce( + t, + {'bootstrap.servers': 'redpanda:29092'}, + None, + key_spec=KeyValueSpec.IGNORE, + value_spec=pk.simple_spec('Price'), + topic_col='Topic' + ) + + self.assertIsNotNone(cleanup) + cleanup() + + def test_simple_spec_topic_col_default_topic(self): + """ + Check a simple Kafka producer works with a topic column and a default topic + """ + t = new_table(cols=[ + string_col('Topic', ['orders_a', None, 'orders_a', 'orders_b']), + double_col('Price', [10.0, 10.5, 11.0, 11.5]) + ]) + cleanup = pk.produce( + t, + {'bootstrap.servers': 'redpanda:29092'}, + 'orders', + key_spec=KeyValueSpec.IGNORE, + value_spec=pk.simple_spec('Price'), + topic_col='Topic' + ) + + self.assertIsNotNone(cleanup) + cleanup() + + def test_simple_spec_default_partition(self): + """ + Check a simple Kafka producer works with a default partition + """ + t = new_table(cols=[ + double_col('Price', [10.0, 10.5, 11.0, 11.5])] + ) + cleanup = pk.produce( + t, + {'bootstrap.servers': 'redpanda:29092'}, + "orders", + key_spec=KeyValueSpec.IGNORE, + value_spec=pk.simple_spec('Price'), + partition=0 + ) + + self.assertIsNotNone(cleanup) + cleanup() + + def test_simple_spec_partition_col_no_default_partition(self): + """ + Check a simple Kafka producer works with a partition column + """ + t = new_table(cols=[ + int_col('Partition', [0, 0, 0, 0]), + double_col('Price', [10.0, 10.5, 11.0, 11.5]) + ]) + cleanup = pk.produce( + t, + {'bootstrap.servers': 'redpanda:29092'}, + "orders", + key_spec=KeyValueSpec.IGNORE, + value_spec=pk.simple_spec('Price'), + partition_col='Partition' + ) + + self.assertIsNotNone(cleanup) + cleanup() + + def test_simple_spec_partition_col_default_partition(self): + """ + Check a simple Kafka producer works with a partition column and default partition + """ + t = new_table(cols=[ + int_col('Partition', [0, 0, None, 0]), + double_col('Price', [10.0, 10.5, 11.0, 11.5]) + ]) + cleanup = pk.produce( + t, + {'bootstrap.servers': 'redpanda:29092'}, + "orders", + key_spec=KeyValueSpec.IGNORE, + value_spec=pk.simple_spec('Price'), + partition=0, + partition_col='Partition' + ) + + self.assertIsNotNone(cleanup) + cleanup() + + def test_simple_spec_timestamp_col(self): + """ + Check a simple Kafka producer works with a timestamp column + """ + t = new_table(cols=[ + datetime_col('Timestamp', [datetime.now(), datetime.now(), None, datetime.now()]), + double_col('Price', [10.0, 10.5, 11.0, 11.5]) + ]) + cleanup = pk.produce( + t, + {'bootstrap.servers': 'redpanda:29092'}, + "orders", + key_spec=KeyValueSpec.IGNORE, + value_spec=pk.simple_spec('Price'), + timestamp_col='Timestamp' + ) + + self.assertIsNotNone(cleanup) + cleanup() + def test_json_spec_only_columns(self): t = table_helper() cleanup = pk.produce( From 052921fb69b69bb260fb8f2de6253a108817d671 Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:47:41 -0600 Subject: [PATCH 35/41] Website integration for R HTML documentation (#4851) * Start docsite * More docsite, vignettes do not work in R studio * More docsite, need to fix vignettes * Done with docsite? * Use R CMD to install in r-build.sh * Change readme, not totally happy with this * Use R CMD build * Update cpp-clients-multi image SHA * Gradle tasks and script for building docsite * Update gradle.properties * Update readme * Starting R doc CI * More doc CI stuff * More doc CI stuff 2 * Update r-site script * Update r-site script again * Update files * Remove mistaken comment * Chmod r-site * Revert docs-ci to only main * Use Don's suggestion Co-authored-by: Don --------- Co-authored-by: Don --- .github/workflows/docs-ci.yml | 48 +++++++++++++++++++++++++++++++++++ R/r-site.sh | 0 2 files changed, 48 insertions(+) mode change 100644 => 100755 R/r-site.sh diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index c581a11037b..c9e9fe5943c 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -191,3 +191,51 @@ jobs: remote_port: ${{ secrets.DOCS_PORT }} remote_user: ${{ secrets.DOCS_USER }} remote_key: ${{ secrets.DEEPHAVEN_CORE_SSH_KEY }} + + rdoc: + runs-on: ubuntu-22.04 + concurrency: + group: rdoc-${{ github.workflow }}-${{ github.ref }} + # We don't want to cancel in-progress jobs against main because that might leave the upload in a bad state. + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup JDK 11 + id: setup-java-11 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '11' + + - name: Set JAVA_HOME + run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV + + - name: Setup gradle properties + run: | + .github/scripts/gradle-properties.sh >> gradle.properties + cat gradle.properties + + - name: Generate R Docs + uses: burrunan/gradle-cache-action@v1 + with: + job-id: rDocs + arguments: R:rClientSite + gradle-version: wrapper + + - name: Deploy R Docs + if: ${{ github.ref == 'refs/heads/main' }} + uses: burnett01/rsync-deployments@5.2 + with: + switches: -avzr --delete + path: R/rdeephaven/docs/ + remote_path: deephaven-core/client-api/r/ + remote_host: ${{ secrets.DOCS_HOST }} + remote_port: ${{ secrets.DOCS_PORT }} + remote_user: ${{ secrets.DOCS_USER }} + remote_key: ${{ secrets.DEEPHAVEN_CORE_SSH_KEY }} + + - name: Upload JVM Error Logs + uses: actions/upload-artifact@v3 + if: failure() \ No newline at end of file diff --git a/R/r-site.sh b/R/r-site.sh old mode 100644 new mode 100755 From b186780071f936f65379a8b1b36b275f0d9e91fc Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Fri, 17 Nov 2023 15:45:29 -0600 Subject: [PATCH 36/41] Invoke typedoc to produce browsable JS API documentation (#4757) Replaces the annotation processor previously used with a doclet implementation. This still emits a .d.ts file using Java mirror/element types for accurate type information, but now is able to traverse Javadoc trees as well, and produces corresponding typedoc tags or markdown. The typedoc tool now generates HTML for deployment to the Deephaven docs website. Additionally, this branch produces a tarball suitable for deployment as an npm module, containing only those generated types. However, this output is still untested. --- .github/workflows/docs-ci.yml | 51 ++++++++++ settings.gradle | 4 + web/client-api/client-api.gradle | 19 +++- web/client-api/types/build.gradle | 52 ++++++++++ web/client-api/types/gradle.properties | 1 + web/client-api/types/package-lock.json | 96 +++++++++++++++++++ web/client-api/types/package.json | 27 ++++++ .../types/src/main/docker/theme.css | 18 ++++ web/client-api/types/tsconfig.json | 7 ++ 9 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 web/client-api/types/build.gradle create mode 100644 web/client-api/types/gradle.properties create mode 100644 web/client-api/types/package-lock.json create mode 100644 web/client-api/types/package.json create mode 100644 web/client-api/types/src/main/docker/theme.css create mode 100644 web/client-api/types/tsconfig.json diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index c9e9fe5943c..9e2ca684d5b 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -65,6 +65,57 @@ jobs: remote_user: ${{ secrets.DOCS_USER }} remote_key: ${{ secrets.DEEPHAVEN_CORE_SSH_KEY }} + typedoc: + runs-on: ubuntu-22.04 + concurrency: + group: typedoc-${{ github.workflow }}-${{ github.ref }} + # We don't want to cancel in-progress jobs against main because that might leave the upload in a bad state. + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup JDK 11 + id: setup-java-11 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '11' + + - name: Setup JDK 17 + id: setup-java-17 + uses: actions/setup-java@v3 + with: + distribution: 'temurin' + java-version: '17' + + - name: Set JAVA_HOME + run: echo "JAVA_HOME=${{ steps.setup-java-11.outputs.path }}" >> $GITHUB_ENV + + - name: Run typedoc on JS API + uses: burrunan/gradle-cache-action@v1 + with: + job-id: typedoc + arguments: --scan :web-client-api:types:typedoc + gradle-version: wrapper + - name: Upload JavaScript/TypeScript docs + if: ${{ github.ref == 'refs/heads/main' }} + uses: actions/upload-artifact@v3 + with: + name: typedoc + path: 'web/client-api/types/build/documentation/' + - name: Deploy JavaScript/TypeScript docs + if: ${{ github.ref == 'refs/heads/main' }} + uses: burnett01/rsync-deployments@5.2 + with: + switches: -avzr --delete + path: web/client-api/docs/build/documentation/ + remote_path: deephaven-core/client-api/javascript/ + remote_host: ${{ secrets.DOCS_HOST }} + remote_port: ${{ secrets.DOCS_PORT }} + remote_user: ${{ secrets.DOCS_USER }} + remote_key: ${{ secrets.DEEPHAVEN_CORE_SSH_KEY }} + pydoc: runs-on: ubuntu-22.04 concurrency: diff --git a/settings.gradle b/settings.gradle index 6cfad7e1b31..f1474fcba81 100644 --- a/settings.gradle +++ b/settings.gradle @@ -416,6 +416,10 @@ Closure configureWebModule = { webMods.collect({ project(":$it")}).each configureWebModule +include ':web-client-api:types' +project(':web-client-api:types').projectDir = file('web/client-api/types') + + buildCache { local { removeUnusedEntriesAfterDays = 4 diff --git a/web/client-api/client-api.gradle b/web/client-api/client-api.gradle index d90b087281c..7409b44b81a 100644 --- a/web/client-api/client-api.gradle +++ b/web/client-api/client-api.gradle @@ -6,14 +6,16 @@ apply from: "$rootDir/gradle/web-client.gradle" configurations { js + dts + typescriptDoclet } dependencies { implementation project(':web-shared-beans') implementation project(':web-client-backplane') - implementation 'com.vertispan.tsdefs:jsinterop-ts-defs-annotations:1.0.0-RC2' - annotationProcessor 'com.vertispan.tsdefs:jsinterop-ts-defs-processor:1.0.0-RC2' + implementation 'com.vertispan.tsdefs:jsinterop-ts-defs-annotations:1.0.0-RC3' + typescriptDoclet 'com.vertispan.tsdefs:jsinterop-ts-defs-doclet:1.0.0-RC3' implementation 'com.vertispan.nio:gwt-nio:1.0-alpha-1' @@ -36,10 +38,23 @@ def gwtOutput = tasks.register('gwtOutput', Sync) { into jsOutput } +def dtsOutput = layout.buildDirectory.dir('ts-types'); +def tsDefs = tasks.register('typescriptDefinitions', Javadoc) { + dependsOn 'compileJava' + source = sourceSets.main.allJava + options.classpath = sourceSets.main.compileClasspath.files as List + destinationDir = dtsOutput.get().asFile + options.docletpath = (configurations.typescriptDoclet.files as List) + (sourceSets.main.compileClasspath.files as List) + options.doclet = 'com.vertispan.tsdefs.doclet.TsDoclet' +} + artifacts { js(jsOutput) { builtBy gwtOutput } + dts(dtsOutput) { + builtBy tsDefs + } } project.tasks.getByName('quick').dependsOn project.tasks.withType(de.esoco.gwt.gradle.task.GwtCompileTask) diff --git a/web/client-api/types/build.gradle b/web/client-api/types/build.gradle new file mode 100644 index 00000000000..d577b86b7bf --- /dev/null +++ b/web/client-api/types/build.gradle @@ -0,0 +1,52 @@ +plugins { + id 'com.bmuschko.docker-remote-api' + id 'io.deephaven.project.register' +} + +configurations { + dts +} +dependencies { + dts project(path: ':web-client-api', configuration: 'dts') +} + +Docker.registerDockerTask(project, 'typedoc') { + copyIn { + from(configurations.dts) { + into 'dist' + } + from 'tsconfig.json' + from 'package.json' + from 'package-lock.json' + + from('src/main/docker') { + include 'theme.css' + } + } + dockerfile { + // share the common base image to keep it simple + from 'deephaven/node:local-build' + + copyFile('.', '/project') + + runCommand('''set -eux; \\ + cd /project/; \\ + mv dist/types.d.ts dist/index.d.ts; \\ + npm ci; \\ + npm pack; \\ + mkdir /out; \\ + mv deephaven-jsapi-types*.tgz /out/; \\ + node_modules/.bin/typedoc dist/index.d.ts \\ + --out /out/documentation \\ + --skipErrorChecking \\ + --hideGenerator \\ + --disableSources \\ + --customCss theme.css; \\ + ''') + } + parentContainers = [ Docker.registryTask(project, 'node') ] // deephaven/node + containerOutPath = '/out' + copyOut { + into "$buildDir/" + } +} diff --git a/web/client-api/types/gradle.properties b/web/client-api/types/gradle.properties new file mode 100644 index 00000000000..8f42cc0940f --- /dev/null +++ b/web/client-api/types/gradle.properties @@ -0,0 +1 @@ +io.deephaven.project.ProjectType=BASIC diff --git a/web/client-api/types/package-lock.json b/web/client-api/types/package-lock.json new file mode 100644 index 00000000000..1a1b22d96b1 --- /dev/null +++ b/web/client-api/types/package-lock.json @@ -0,0 +1,96 @@ +{ + "requires": true, + "lockfileVersion": 1, + "dependencies": { + "ansi-sequence-parser": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ansi-sequence-parser/-/ansi-sequence-parser-1.1.1.tgz", + "integrity": "sha512-vJXt3yiaUL4UU546s3rPXlsry/RnM730G1+HkpKE012AN0sx1eOrxSu95oKDIonskeLTijMgqWZ3uDEe3NFvyg==", + "dev": true + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0" + } + }, + "jsonc-parser": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.2.0.tgz", + "integrity": "sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w==", + "dev": true + }, + "lunr": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz", + "integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==", + "dev": true + }, + "marked": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/marked/-/marked-4.3.0.tgz", + "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==", + "dev": true + }, + "minimatch": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.3.tgz", + "integrity": "sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg==", + "dev": true, + "requires": { + "brace-expansion": "^2.0.1" + } + }, + "shiki": { + "version": "0.14.3", + "resolved": "https://registry.npmjs.org/shiki/-/shiki-0.14.3.tgz", + "integrity": "sha512-U3S/a+b0KS+UkTyMjoNojvTgrBHjgp7L6ovhFVZsXmBGnVdQ4K4U9oK0z63w538S91ATngv1vXigHCSWOwnr+g==", + "dev": true, + "requires": { + "ansi-sequence-parser": "^1.1.0", + "jsonc-parser": "^3.2.0", + "vscode-oniguruma": "^1.7.0", + "vscode-textmate": "^8.0.0" + } + }, + "typedoc": { + "version": "0.24.8", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.24.8.tgz", + "integrity": "sha512-ahJ6Cpcvxwaxfu4KtjA8qZNqS43wYt6JL27wYiIgl1vd38WW/KWX11YuAeZhuz9v+ttrutSsgK+XO1CjL1kA3w==", + "dev": true, + "requires": { + "lunr": "^2.3.9", + "marked": "^4.3.0", + "minimatch": "^9.0.0", + "shiki": "^0.14.1" + } + }, + "typescript": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.1.6.tgz", + "integrity": "sha512-zaWCozRZ6DLEWAWFrVDz1H6FVXzUSfTy5FUMWsQlU8Ym5JP9eO4xkTIROFCQvhQf61z6O/G6ugw3SgAnvvm+HA==", + "dev": true + }, + "vscode-oniguruma": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-1.7.0.tgz", + "integrity": "sha512-L9WMGRfrjOhgHSdOYgCt/yRMsXzLDJSL7BPrOZt73gU0iWO4mpqzqQzOz5srxqTvMBaR0XZTSrVWo4j55Rc6cA==", + "dev": true + }, + "vscode-textmate": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/vscode-textmate/-/vscode-textmate-8.0.0.tgz", + "integrity": "sha512-AFbieoL7a5LMqcnOF04ji+rpXadgOXnZsxQr//r83kLPr7biP7am3g9zbaZIaBGwBRWeSvoMD4mgPdX3e4NWBg==", + "dev": true + } + } +} diff --git a/web/client-api/types/package.json b/web/client-api/types/package.json new file mode 100644 index 00000000000..ce87d6d3acb --- /dev/null +++ b/web/client-api/types/package.json @@ -0,0 +1,27 @@ +{ + "name": "@deephaven/jsapi-types", + "version": "1.0.0-dev1", + "description": "Deephaven JSAPI Types", + "author": "Deephaven Data Labs LLC", + "license": "Apache-2.0", + "type": "module", + "repository": { + "type": "git", + "url": "https://github.com/deephaven/deephaven-core.git", + "directory": "web/client-api/types" + }, + "engines": { + "node": ">=16" + }, + "devDependencies": { + "typedoc": "^0.24.8", + "typescript": "^5.1.6" + }, + "files": [ + "dist" + ], + "sideEffects": false, + "publishConfig": { + "access": "public" + } +} \ No newline at end of file diff --git a/web/client-api/types/src/main/docker/theme.css b/web/client-api/types/src/main/docker/theme.css new file mode 100644 index 00000000000..ad4c725a9ab --- /dev/null +++ b/web/client-api/types/src/main/docker/theme.css @@ -0,0 +1,18 @@ +:root { + --dark-color-background: #040427; + --dark-color-background-secondary: #0d1b37; + --dark-color-background-warning: #fffbef; + --dark-color-warning-text: #463b19; + --dark-color-accent: #24405a; + --dark-color-active-menu-item: #2f546c; + --dark-color-text: #f0f9fb; + --dark-color-text-aside: #d3d3d3; + --dark-color-link: #65c6da; + --dark-color-ts-project: #b7a9f6; + --dark-color-ts-enum: #f4d93e; + --dark-color-ts-variable: #b2e3ed; + --dark-color-ts-function: #d5cdfa; + --dark-color-ts-class: #93d7e5; + --dark-color-ts-interface: #afd685; + --dark-color-ts-type-alias: #f27596; +} \ No newline at end of file diff --git a/web/client-api/types/tsconfig.json b/web/client-api/types/tsconfig.json new file mode 100644 index 00000000000..2536163998f --- /dev/null +++ b/web/client-api/types/tsconfig.json @@ -0,0 +1,7 @@ +{ + "files": ["dist/index.d.ts"], + "compilerOptions": { + "target": "es5", + "lib": ["es5","es2015", "DOM"] + } +} From a4e6c5ba6accdbba6c2c6e9cc2ef8d05f3a65bf8 Mon Sep 17 00:00:00 2001 From: Colin Alworth Date: Fri, 17 Nov 2023 16:15:52 -0600 Subject: [PATCH 37/41] Correct the path to read generated typedoc when deploying (#4854) --- .github/workflows/docs-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs-ci.yml b/.github/workflows/docs-ci.yml index 9e2ca684d5b..99161f04ca7 100644 --- a/.github/workflows/docs-ci.yml +++ b/.github/workflows/docs-ci.yml @@ -109,7 +109,7 @@ jobs: uses: burnett01/rsync-deployments@5.2 with: switches: -avzr --delete - path: web/client-api/docs/build/documentation/ + path: web/client-api/types/build/documentation/ remote_path: deephaven-core/client-api/javascript/ remote_host: ${{ secrets.DOCS_HOST }} remote_port: ${{ secrets.DOCS_PORT }} From 97b840781d0d574f1d8e5e2a68bdf306b01d08e1 Mon Sep 17 00:00:00 2001 From: Nate Bauernfeind Date: Fri, 17 Nov 2023 23:45:09 -0700 Subject: [PATCH 38/41] QueryPerformanceRecorder: Group Batched Operations as a Single Query (#4760) --- .../engine/table/impl/QueryTable.java | 20 +- .../table/impl/SelectOrUpdateListener.java | 8 +- .../table/impl/perf/BasePerformanceEntry.java | 124 ++-- .../table/impl/perf/PerformanceEntry.java | 25 +- .../impl/perf/QueryPerformanceNugget.java | 482 +++++++------ .../impl/perf/QueryPerformanceRecorder.java | 654 ++++++------------ .../perf/QueryPerformanceRecorderImpl.java | 344 +++++++++ .../perf/QueryPerformanceRecorderState.java | 269 +++++++ .../impl/perf/QueryProcessingResults.java | 43 -- .../engine/table/impl/perf/QueryState.java | 2 +- .../UpdatePerformanceStreamPublisher.java | 80 ++- .../impl/perf/UpdatePerformanceTracker.java | 10 +- .../table/impl/select/ConditionFilter.java | 6 +- .../table/impl/select/DhFormulaColumn.java | 5 +- .../select/codegen/JavaKernelBuilder.java | 5 +- .../engine/table/impl/updateby/UpdateBy.java | 14 +- .../table/impl/util/AsyncErrorImpl.java | 4 +- .../impl/util/AsyncErrorStreamPublisher.java | 12 +- .../engine/table/impl/util/EngineMetrics.java | 37 +- ...erationInitializationPoolJobScheduler.java | 4 +- .../util/QueryOperationPerformanceImpl.java | 14 +- ...ryOperationPerformanceStreamPublisher.java | 118 +++- .../table/impl/util/QueryPerformanceImpl.java | 18 +- .../util/QueryPerformanceStreamPublisher.java | 66 +- .../impl/util/UpdateGraphJobScheduler.java | 4 +- .../QueryOperationPerformanceLogLogger.java | 13 +- .../QueryPerformanceLogLogger.java | 29 +- .../deephaven/engine/util/TableShowTools.java | 7 +- .../select/TestConstantFormulaEvaluation.java | 24 +- .../main/java/io/deephaven/csv/CsvTools.java | 9 +- .../table/impl/util/PerformanceQueries.java | 22 + .../impl/util/PerformanceQueriesGeneral.java | 150 ++-- .../main/resources/defaultPackageFilters.qpr | 10 +- py/server/deephaven/perfmon.py | 37 +- py/server/tests/test_perfmon.py | 6 +- .../server/arrow/ArrowFlightUtil.java | 227 +++--- .../server/arrow/FlightServiceGrpcImpl.java | 109 ++- .../barrage/BarrageMessageProducer.java | 2 +- .../console/ConsoleServiceGrpcImpl.java | 118 ++-- .../HierarchicalTableServiceGrpcImpl.java | 431 ++++++------ .../server/object/ObjectServiceGrpcImpl.java | 102 +-- .../PartitionedTableServiceGrpcImpl.java | 200 +++--- .../server/runner/DeephavenApiServer.java | 6 +- .../session/SessionServiceGrpcImpl.java | 63 +- .../server/session/SessionState.java | 115 +-- .../server/session/TicketRouter.java | 37 +- .../InputTableServiceGrpcImpl.java | 189 ++--- .../table/ops/TableServiceGrpcImpl.java | 366 ++++++---- 48 files changed, 2789 insertions(+), 1851 deletions(-) create mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderImpl.java create mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderState.java delete mode 100644 engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryProcessingResults.java diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java index 60189aba160..5301eb54b48 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/QueryTable.java @@ -1268,11 +1268,8 @@ void handleUncaughtException(Exception throwable) { final BasePerformanceEntry basePerformanceEntry = initialFilterExecution.getBasePerformanceEntry(); if (basePerformanceEntry != null) { - final QueryPerformanceNugget outerNugget = - QueryPerformanceRecorder.getInstance().getOuterNugget(); - if (outerNugget != null) { - outerNugget.addBaseEntry(basePerformanceEntry); - } + QueryPerformanceRecorder.getInstance().getEnclosingNugget() + .accumulate(basePerformanceEntry); } } currentMapping.initializePreviousValue(); @@ -1516,11 +1513,7 @@ this, mode, columns, rowSet, getModifiedColumnSetForUpdates(), publishTheseSourc } finally { final BasePerformanceEntry baseEntry = jobScheduler.getAccumulatedPerformance(); if (baseEntry != null) { - final QueryPerformanceNugget outerNugget = - QueryPerformanceRecorder.getInstance().getOuterNugget(); - if (outerNugget != null) { - outerNugget.addBaseEntry(baseEntry); - } + QueryPerformanceRecorder.getInstance().getEnclosingNugget().accumulate(baseEntry); } } } @@ -3572,12 +3565,9 @@ public static void checkInitiateBinaryOperation(@NotNull final Table first, @Not } private R applyInternal(@NotNull final Function function) { - final QueryPerformanceNugget nugget = - QueryPerformanceRecorder.getInstance().getNugget("apply(" + function + ")"); - try { + try (final SafeCloseable ignored = + QueryPerformanceRecorder.getInstance().getNugget("apply(" + function + ")")) { return function.apply(this); - } finally { - nugget.done(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java index 724fe1ff0cc..1cab4f19722 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/SelectOrUpdateListener.java @@ -134,11 +134,9 @@ private void completionRoutine(TableUpdate upstream, JobScheduler jobScheduler, getUpdateGraph().addNotification(new TerminalNotification() { @Override public void run() { - synchronized (accumulated) { - final PerformanceEntry entry = getEntry(); - if (entry != null) { - entry.accumulate(accumulated); - } + final PerformanceEntry entry = getEntry(); + if (entry != null) { + entry.accumulate(accumulated); } } }); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/BasePerformanceEntry.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/BasePerformanceEntry.java index d63ce199cac..4da70a34572 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/BasePerformanceEntry.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/BasePerformanceEntry.java @@ -7,6 +7,7 @@ import io.deephaven.base.log.LogOutputAppendable; import io.deephaven.base.verify.Assert; import io.deephaven.util.profiling.ThreadProfiler; +import org.jetbrains.annotations.NotNull; import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.minus; import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.plus; @@ -15,13 +16,13 @@ * A smaller entry that simply records usage data, meant for aggregating into the larger entry. */ public class BasePerformanceEntry implements LogOutputAppendable { - private long intervalUsageNanos; + private long usageNanos; - private long intervalCpuNanos; - private long intervalUserCpuNanos; + private long cpuNanos; + private long userCpuNanos; - private long intervalAllocatedBytes; - private long intervalPoolAllocatedBytes; + private long allocatedBytes; + private long poolAllocatedBytes; private long startTimeNanos; @@ -31,26 +32,26 @@ public class BasePerformanceEntry implements LogOutputAppendable { private long startAllocatedBytes; private long startPoolAllocatedBytes; - public void onBaseEntryStart() { + public synchronized void onBaseEntryStart() { startAllocatedBytes = ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(); - startPoolAllocatedBytes = QueryPerformanceRecorder.getPoolAllocatedBytesForCurrentThread(); + startPoolAllocatedBytes = QueryPerformanceRecorderState.getPoolAllocatedBytesForCurrentThread(); startUserCpuNanos = ThreadProfiler.DEFAULT.getCurrentThreadUserTime(); startCpuNanos = ThreadProfiler.DEFAULT.getCurrentThreadCpuTime(); startTimeNanos = System.nanoTime(); } - public void onBaseEntryEnd() { - intervalUserCpuNanos = plus(intervalUserCpuNanos, + public synchronized void onBaseEntryEnd() { + userCpuNanos = plus(userCpuNanos, minus(ThreadProfiler.DEFAULT.getCurrentThreadUserTime(), startUserCpuNanos)); - intervalCpuNanos = - plus(intervalCpuNanos, minus(ThreadProfiler.DEFAULT.getCurrentThreadCpuTime(), startCpuNanos)); + cpuNanos = + plus(cpuNanos, minus(ThreadProfiler.DEFAULT.getCurrentThreadCpuTime(), startCpuNanos)); - intervalUsageNanos += System.nanoTime() - startTimeNanos; + usageNanos += System.nanoTime() - startTimeNanos; - intervalPoolAllocatedBytes = plus(intervalPoolAllocatedBytes, - minus(QueryPerformanceRecorder.getPoolAllocatedBytesForCurrentThread(), startPoolAllocatedBytes)); - intervalAllocatedBytes = plus(intervalAllocatedBytes, + poolAllocatedBytes = plus(poolAllocatedBytes, + minus(QueryPerformanceRecorderState.getPoolAllocatedBytesForCurrentThread(), startPoolAllocatedBytes)); + allocatedBytes = plus(allocatedBytes, minus(ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(), startAllocatedBytes)); startAllocatedBytes = 0; @@ -61,46 +62,76 @@ public void onBaseEntryEnd() { startTimeNanos = 0; } - void baseEntryReset() { + synchronized void baseEntryReset() { Assert.eqZero(startTimeNanos, "startTimeNanos"); - intervalUsageNanos = 0; + usageNanos = 0; - intervalCpuNanos = 0; - intervalUserCpuNanos = 0; + cpuNanos = 0; + userCpuNanos = 0; - intervalAllocatedBytes = 0; - intervalPoolAllocatedBytes = 0; + allocatedBytes = 0; + poolAllocatedBytes = 0; } - public long getIntervalUsageNanos() { - return intervalUsageNanos; + /** + * Get the aggregate usage in nanoseconds. This getter should be called by exclusive owners of the entry, and never + * concurrently with mutators. + * + * @return total wall clock time in nanos + */ + public long getUsageNanos() { + return usageNanos; } - public long getIntervalCpuNanos() { - return intervalCpuNanos; + /** + * Get the aggregate cpu time in nanoseconds. This getter should be called by exclusive owners of the entry, and + * never concurrently with mutators. + * + * @return total cpu time in nanos + */ + public long getCpuNanos() { + return cpuNanos; } - public long getIntervalUserCpuNanos() { - return intervalUserCpuNanos; + /** + * Get the aggregate cpu user time in nanoseconds. This getter should be called by exclusive owners of the entry, + * and never concurrently with mutators. + * + * @return total cpu user time in nanos + */ + public long getUserCpuNanos() { + return userCpuNanos; } - public long getIntervalAllocatedBytes() { - return intervalAllocatedBytes; + /** + * Get the aggregate allocated memory in bytes. This getter should be called by exclusive owners of the entry, and + * never concurrently with mutators. + * + * @return The bytes of allocated memory attributed to the instrumented operation. + */ + public long getAllocatedBytes() { + return allocatedBytes; } - public long getIntervalPoolAllocatedBytes() { - return intervalPoolAllocatedBytes; + /** + * Get allocated pooled/reusable memory attributed to the instrumented operation in bytes. This getter should be + * called by exclusive owners of the entry, and never concurrently with mutators. + * + * @return total pool allocated memory in bytes + */ + public long getPoolAllocatedBytes() { + return poolAllocatedBytes; } @Override - public LogOutput append(LogOutput logOutput) { + public LogOutput append(@NotNull final LogOutput logOutput) { final LogOutput currentValues = logOutput.append("BasePerformanceEntry{") - .append(", intervalUsageNanos=").append(intervalUsageNanos) - .append(", intervalCpuNanos=").append(intervalCpuNanos) - .append(", intervalUserCpuNanos=").append(intervalUserCpuNanos) - .append(", intervalAllocatedBytes=").append(intervalAllocatedBytes) - .append(", intervalPoolAllocatedBytes=").append(intervalPoolAllocatedBytes); + .append(", intervalUsageNanos=").append(usageNanos) + .append(", intervalCpuNanos=").append(cpuNanos) + .append(", intervalUserCpuNanos=").append(userCpuNanos) + .append(", intervalAllocatedBytes=").append(allocatedBytes) + .append(", intervalPoolAllocatedBytes=").append(poolAllocatedBytes); return appendStart(currentValues) .append('}'); } @@ -114,12 +145,17 @@ LogOutput appendStart(LogOutput logOutput) { .append(", startPoolAllocatedBytes=").append(startPoolAllocatedBytes); } - public void accumulate(BasePerformanceEntry entry) { - this.intervalUsageNanos += entry.intervalUsageNanos; - this.intervalCpuNanos = plus(this.intervalCpuNanos, entry.intervalCpuNanos); - this.intervalUserCpuNanos = plus(this.intervalUserCpuNanos, entry.intervalUserCpuNanos); - - this.intervalAllocatedBytes = plus(this.intervalAllocatedBytes, entry.intervalAllocatedBytes); - this.intervalPoolAllocatedBytes = plus(this.intervalPoolAllocatedBytes, entry.intervalPoolAllocatedBytes); + /** + * Accumulate the values from another entry into this one. The provided entry will not be mutated. + * + * @param entry the entry to accumulate + */ + public synchronized void accumulate(@NotNull final BasePerformanceEntry entry) { + this.usageNanos += entry.usageNanos; + this.cpuNanos = plus(this.cpuNanos, entry.cpuNanos); + this.userCpuNanos = plus(this.userCpuNanos, entry.userCpuNanos); + + this.allocatedBytes = plus(this.allocatedBytes, entry.allocatedBytes); + this.poolAllocatedBytes = plus(this.poolAllocatedBytes, entry.poolAllocatedBytes); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/PerformanceEntry.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/PerformanceEntry.java index fb03b488ffb..889581ab928 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/PerformanceEntry.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/PerformanceEntry.java @@ -13,13 +13,14 @@ import io.deephaven.io.log.impl.LogOutputStringImpl; import io.deephaven.time.DateTimeUtils; import io.deephaven.util.QueryConstants; +import org.jetbrains.annotations.NotNull; /** * Entry class for tracking the performance characteristics of a single recurring update event. */ public class PerformanceEntry extends BasePerformanceEntry implements TableListener.Entry { - private final int id; - private final int evaluationNumber; + private final long id; + private final long evaluationNumber; private final int operationNumber; private final String description; private final String callerLine; @@ -42,7 +43,7 @@ public class PerformanceEntry extends BasePerformanceEntry implements TableListe private final RuntimeMemory.Sample startSample; private final RuntimeMemory.Sample endSample; - PerformanceEntry(final int id, final int evaluationNumber, final int operationNumber, + PerformanceEntry(final long id, final long evaluationNumber, final int operationNumber, final String description, final String callerLine, final String updateGraphName) { this.id = id; this.evaluationNumber = evaluationNumber; @@ -114,7 +115,7 @@ public String toString() { } @Override - public LogOutput append(final LogOutput logOutput) { + public LogOutput append(@NotNull final LogOutput logOutput) { final LogOutput beginning = logOutput.append("PerformanceEntry{") .append(", id=").append(id) .append(", evaluationNumber=").append(evaluationNumber) @@ -122,16 +123,16 @@ public LogOutput append(final LogOutput logOutput) { .append(", description='").append(description).append('\'') .append(", callerLine='").append(callerLine).append('\'') .append(", authContext=").append(authContext) - .append(", intervalUsageNanos=").append(getIntervalUsageNanos()) - .append(", intervalCpuNanos=").append(getIntervalCpuNanos()) - .append(", intervalUserCpuNanos=").append(getIntervalUserCpuNanos()) + .append(", intervalUsageNanos=").append(getUsageNanos()) + .append(", intervalCpuNanos=").append(getCpuNanos()) + .append(", intervalUserCpuNanos=").append(getUserCpuNanos()) .append(", intervalInvocationCount=").append(intervalInvocationCount) .append(", intervalAdded=").append(intervalAdded) .append(", intervalRemoved=").append(intervalRemoved) .append(", intervalModified=").append(intervalModified) .append(", intervalShifted=").append(intervalShifted) - .append(", intervalAllocatedBytes=").append(getIntervalAllocatedBytes()) - .append(", intervalPoolAllocatedBytes=").append(getIntervalPoolAllocatedBytes()) + .append(", intervalAllocatedBytes=").append(getAllocatedBytes()) + .append(", intervalPoolAllocatedBytes=").append(getPoolAllocatedBytes()) .append(", maxTotalMemory=").append(maxTotalMemory) .append(", minFreeMemory=").append(minFreeMemory) .append(", collections=").append(collections) @@ -140,11 +141,11 @@ public LogOutput append(final LogOutput logOutput) { .append('}'); } - public int getId() { + public long getId() { return id; } - public int getEvaluationNumber() { + public long getEvaluationNumber() { return evaluationNumber; } @@ -217,7 +218,7 @@ public long getIntervalInvocationCount() { */ boolean shouldLogEntryInterval() { return intervalInvocationCount > 0 && - UpdatePerformanceTracker.LOG_THRESHOLD.shouldLog(getIntervalUsageNanos()); + UpdatePerformanceTracker.LOG_THRESHOLD.shouldLog(getUsageNanos()); } public void accumulate(PerformanceEntry entry) { diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceNugget.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceNugget.java index 76761e8b561..950e279af2a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceNugget.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceNugget.java @@ -4,15 +4,20 @@ package io.deephaven.engine.table.impl.perf; import io.deephaven.auth.AuthContext; +import io.deephaven.base.clock.SystemClock; +import io.deephaven.base.log.LogOutput; +import io.deephaven.base.verify.Assert; import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.io.log.impl.LogOutputStringImpl; import io.deephaven.time.DateTimeUtils; import io.deephaven.engine.table.impl.util.RuntimeMemory; import io.deephaven.util.QueryConstants; -import io.deephaven.util.profiling.ThreadProfiler; +import io.deephaven.util.SafeCloseable; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; -import java.io.Serializable; +import java.util.function.Consumer; -import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.minus; import static io.deephaven.util.QueryConstants.*; /** @@ -20,78 +25,185 @@ * intimate relationship with another class, {@link QueryPerformanceRecorder}. Changes to either should take this lack * of encapsulation into account. */ -public class QueryPerformanceNugget implements Serializable, AutoCloseable { - private static final QueryPerformanceLogThreshold LOG_THRESHOLD = new QueryPerformanceLogThreshold("", 1_000_000); - private static final QueryPerformanceLogThreshold UNINSTRUMENTED_LOG_THRESHOLD = - new QueryPerformanceLogThreshold("Uninstrumented", 1_000_000_000); +public class QueryPerformanceNugget extends BasePerformanceEntry implements SafeCloseable { private static final int MAX_DESCRIPTION_LENGTH = 16 << 10; - private static final long serialVersionUID = 2L; - /** * A re-usable "dummy" nugget which will never collect any information or be recorded. */ - static final QueryPerformanceNugget DUMMY_NUGGET = new QueryPerformanceNugget(); + static final QueryPerformanceNugget DUMMY_NUGGET = new QueryPerformanceNugget() { + @Override + public void accumulate(@NotNull BasePerformanceEntry entry) { + // non-synchronized no-op override + } + + @Override + public boolean shouldLog() { + return false; + } + }; + + public interface Factory { + /** + * Factory method for query-level nuggets. + * + * @param evaluationNumber A unique identifier for the query evaluation that triggered this nugget creation + * @param description The operation description + * @param sessionId The gRPC client session-id if applicable + * @param onCloseCallback A callback that is invoked when the nugget is closed. It returns whether the nugget + * should be logged. + * @return A new nugget + */ + default QueryPerformanceNugget createForQuery( + final long evaluationNumber, + @NotNull final String description, + @Nullable final String sessionId, + @NotNull final Consumer onCloseCallback) { + return new QueryPerformanceNugget(evaluationNumber, NULL_LONG, NULL_INT, NULL_INT, NULL_INT, description, + sessionId, false, NULL_LONG, onCloseCallback); + } + + /** + * Factory method for sub-query-level nuggets. + * + * @param parentQuery The parent query nugget + * @param evaluationNumber A unique identifier for the sub-query evaluation that triggered this nugget creation + * @param description The operation description + * @param onCloseCallback A callback that is invoked when the nugget is closed. It returns whether the nugget + * should be logged. + * @return A new nugget + */ + default QueryPerformanceNugget createForSubQuery( + @NotNull final QueryPerformanceNugget parentQuery, + final long evaluationNumber, + @NotNull final String description, + @NotNull final Consumer onCloseCallback) { + Assert.eqTrue(parentQuery.isQueryLevel(), "parentQuery.isQueryLevel()"); + return new QueryPerformanceNugget(evaluationNumber, parentQuery.getEvaluationNumber(), NULL_INT, NULL_INT, + NULL_INT, description, parentQuery.getSessionId(), false, NULL_LONG, onCloseCallback); + } + + /** + * Factory method for operation-level nuggets. + * + * @param parentQueryOrOperation The parent query / operation nugget + * @param operationNumber A query-unique identifier for the operation + * @param description The operation description + * @param onCloseCallback A callback that is invoked when the nugget is closed. It returns whether the nugget + * should be logged. + * @return A new nugget + */ + default QueryPerformanceNugget createForOperation( + @NotNull final QueryPerformanceNugget parentQueryOrOperation, + final int operationNumber, + final String description, + final long inputSize, + @NotNull final Consumer onCloseCallback) { + int depth = parentQueryOrOperation.getDepth(); + if (depth == NULL_INT) { + depth = 0; + } else { + ++depth; + } + + return new QueryPerformanceNugget( + parentQueryOrOperation.getEvaluationNumber(), + parentQueryOrOperation.getParentEvaluationNumber(), + operationNumber, + parentQueryOrOperation.getOperationNumber(), + depth, + description, + parentQueryOrOperation.getSessionId(), + true, // operations are always user + inputSize, + onCloseCallback); + } + + /** + * Factory method for catch-all nuggets. + * + * @param parentQuery The parent query nugget + * @param operationNumber A query-unique identifier for the operation + * @param onCloseCallback A callback that is invoked when the nugget is closed. It returns whether the nugget + * should be logged. + * @return A new nugget + */ + default QueryPerformanceNugget createForCatchAll( + @NotNull final QueryPerformanceNugget parentQuery, + final int operationNumber, + @NotNull final Consumer onCloseCallback) { + Assert.eqTrue(parentQuery.isQueryLevel(), "parentQuery.isQueryLevel()"); + return new QueryPerformanceNugget( + parentQuery.getEvaluationNumber(), + parentQuery.getParentEvaluationNumber(), + operationNumber, + NULL_INT, // catch all has no parent operation + 0, // catch all is a root operation + QueryPerformanceRecorder.UNINSTRUMENTED_CODE_DESCRIPTION, + parentQuery.getSessionId(), + false, // catch all is not user + NULL_LONG, + onCloseCallback); // catch all has no input size + } + } - private final int evaluationNumber; + public static final Factory DEFAULT_FACTORY = new Factory() {}; + + private final long evaluationNumber; + private final long parentEvaluationNumber; + private final int operationNumber; + private final int parentOperationNumber; private final int depth; private final String description; + private final String sessionId; private final boolean isUser; private final long inputSize; - + private final Consumer onCloseCallback; private final AuthContext authContext; private final String callerLine; - private final long startClockTime; + private long startClockEpochNanos; + private long endClockEpochNanos; - private final long startTimeNanos; - private final long startCpuNanos; - private final long startUserCpuNanos; - private final long startAllocatedBytes; - private final long startPoolAllocatedBytes; private volatile QueryState state; - private Long totalTimeNanos; - private long diffCpuNanos; - private long diffUserCpuNanos; - private long diffAllocatedBytes; - private long diffPoolAllocatedBytes; - - private final RuntimeMemory.Sample startMemorySample; - private final RuntimeMemory.Sample endMemorySample; - - private boolean shouldLogMeAndStackParents; + private RuntimeMemory.Sample startMemorySample; + private RuntimeMemory.Sample endMemorySample; - /** - * For threaded operations we want to accumulate the CPU time, allocations, and read operations to the enclosing - * nugget of the main operation. For the initialization we ignore the wall clock time taken in the thread pool. - */ - private BasePerformanceEntry basePerformanceEntry; - - /** - * Constructor for query-level nuggets. - * - * @param evaluationNumber A unique identifier for the query evaluation that triggered this nugget creation - * @param description The operation description - */ - QueryPerformanceNugget(final int evaluationNumber, final String description) { - this(evaluationNumber, NULL_INT, description, false, NULL_LONG); - } + /** whether this nugget triggers the logging of itself and every other nugget in its stack of nesting operations */ + private boolean shouldLog; /** * Full constructor for nuggets. * * @param evaluationNumber A unique identifier for the query evaluation that triggered this nugget creation + * @param parentEvaluationNumber The unique identifier of the parent evaluation or {@link QueryConstants#NULL_LONG} + * if none + * @param operationNumber A unique identifier for the operation within a query evaluation + * @param parentOperationNumber The unique identifier of the parent operation or {@link QueryConstants#NULL_INT} if + * none * @param depth Depth in the evaluation chain for the respective operation * @param description The operation description * @param isUser Whether this is a "user" nugget or one created by the system * @param inputSize The size of the input data + * @param onCloseCallback A callback that is invoked when the nugget is closed. It returns whether the nugget should + * be logged. */ - QueryPerformanceNugget(final int evaluationNumber, final int depth, - final String description, final boolean isUser, final long inputSize) { - startMemorySample = new RuntimeMemory.Sample(); - endMemorySample = new RuntimeMemory.Sample(); + protected QueryPerformanceNugget( + final long evaluationNumber, + final long parentEvaluationNumber, + final int operationNumber, + final int parentOperationNumber, + final int depth, + @NotNull final String description, + @Nullable final String sessionId, + final boolean isUser, + final long inputSize, + @NotNull final Consumer onCloseCallback) { this.evaluationNumber = evaluationNumber; + this.parentEvaluationNumber = parentEvaluationNumber; + this.operationNumber = operationNumber; + this.parentOperationNumber = parentOperationNumber; this.depth = depth; if (description.length() > MAX_DESCRIPTION_LENGTH) { this.description = description.substring(0, MAX_DESCRIPTION_LENGTH) + " ... [truncated " @@ -99,85 +211,92 @@ public class QueryPerformanceNugget implements Serializable, AutoCloseable { } else { this.description = description; } + this.sessionId = sessionId; this.isUser = isUser; this.inputSize = inputSize; + this.onCloseCallback = onCloseCallback; authContext = ExecutionContext.getContext().getAuthContext(); callerLine = QueryPerformanceRecorder.getCallerLine(); - final RuntimeMemory runtimeMemory = RuntimeMemory.getInstance(); - runtimeMemory.read(startMemorySample); - - startAllocatedBytes = ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(); - startPoolAllocatedBytes = QueryPerformanceRecorder.getPoolAllocatedBytesForCurrentThread(); - - startClockTime = System.currentTimeMillis(); - startTimeNanos = System.nanoTime(); + startClockEpochNanos = NULL_LONG; + endClockEpochNanos = NULL_LONG; - startCpuNanos = ThreadProfiler.DEFAULT.getCurrentThreadCpuTime(); - startUserCpuNanos = ThreadProfiler.DEFAULT.getCurrentThreadUserTime(); - - state = QueryState.RUNNING; - shouldLogMeAndStackParents = false; + state = QueryState.NOT_STARTED; } /** * Construct a "dummy" nugget, which will never gather any information or be recorded. */ private QueryPerformanceNugget() { - startMemorySample = null; - endMemorySample = null; - evaluationNumber = NULL_INT; + evaluationNumber = NULL_LONG; + parentEvaluationNumber = NULL_LONG; + operationNumber = NULL_INT; + parentOperationNumber = NULL_INT; depth = 0; description = null; + sessionId = null; isUser = false; inputSize = NULL_LONG; - + onCloseCallback = null; authContext = null; callerLine = null; - startAllocatedBytes = NULL_LONG; - startPoolAllocatedBytes = NULL_LONG; - - startClockTime = NULL_LONG; - startTimeNanos = NULL_LONG; + startClockEpochNanos = NULL_LONG; + endClockEpochNanos = NULL_LONG; - startCpuNanos = NULL_LONG; - startUserCpuNanos = NULL_LONG; + state = QueryState.NOT_STARTED; + } - basePerformanceEntry = null; + /** + * Start clock epoch nanos is set if this is the first time this nugget has been started. + */ + @Override + public synchronized void onBaseEntryStart() { + // note that we explicitly do not call super.onBaseEntryStart() on query level nuggets as all top level nuggets + // accumulate into it to account for parallelized execution + if (operationNumber != NULL_INT) { + super.onBaseEntryStart(); + } + if (state == QueryState.RUNNING) { + throw new IllegalStateException("Nugget was already started"); + } + if (startClockEpochNanos == NULL_LONG) { + startClockEpochNanos = SystemClock.systemUTC().currentTimeNanos(); + } + startMemorySample = new RuntimeMemory.Sample(); + endMemorySample = new RuntimeMemory.Sample(); + final RuntimeMemory runtimeMemory = RuntimeMemory.getInstance(); + runtimeMemory.read(startMemorySample); - state = null; // This turns close into a no-op. - shouldLogMeAndStackParents = false; + state = QueryState.RUNNING; } - public void done() { - done(QueryPerformanceRecorder.getInstance()); + @Override + public synchronized void onBaseEntryEnd() { + if (state != QueryState.RUNNING) { + throw new IllegalStateException("Nugget isn't running"); + } + state = QueryState.SUSPENDED; + // note that we explicitly do not call super.onBaseEntryEnd() on query level nuggets as all top level nuggets + // accumulate into it to account for parallelized execution + if (operationNumber != NULL_INT) { + super.onBaseEntryEnd(); + } } /** * Mark this nugget {@link QueryState#FINISHED} and notify the recorder. - * - * @param recorder The recorder to notify - * @return if the nugget passes logging thresholds. - */ - public boolean done(final QueryPerformanceRecorder recorder) { - return close(QueryState.FINISHED, recorder); - } - - /** - * AutoCloseable implementation - wraps the no-argument version of done() used by query code outside of the - * QueryPerformance(Recorder/Nugget), reporting successful completion to the thread-local QueryPerformanceRecorder - * instance. + *

+ * {@link SafeCloseable} implementation for try-with-resources. */ @Override public void close() { - done(); + close(QueryState.FINISHED); } - @SuppressWarnings("WeakerAccess") - public boolean abort(final QueryPerformanceRecorder recorder) { - return close(QueryState.INTERRUPTED, recorder); + public void abort() { + close(QueryState.INTERRUPTED); } /** @@ -185,71 +304,87 @@ public boolean abort(final QueryPerformanceRecorder recorder) { * * @param closingState The current query state. If it is anything other than {@link QueryState#RUNNING} nothing will * happen and it will return false; - * - * @param recorderToNotify The {@link QueryPerformanceRecorder} to notify this nugget is closing. - * @return If the nugget passes criteria for logging. */ - private boolean close(final QueryState closingState, final QueryPerformanceRecorder recorderToNotify) { - final long currentThreadUserTime = ThreadProfiler.DEFAULT.getCurrentThreadUserTime(); - final long currentThreadCpuTime = ThreadProfiler.DEFAULT.getCurrentThreadCpuTime(); + private void close(final QueryState closingState) { if (state != QueryState.RUNNING) { - return false; + return; } synchronized (this) { if (state != QueryState.RUNNING) { - return false; + return; } - diffUserCpuNanos = minus(currentThreadUserTime, startUserCpuNanos); - diffCpuNanos = minus(currentThreadCpuTime, startCpuNanos); - - totalTimeNanos = System.nanoTime() - startTimeNanos; + onBaseEntryEnd(); + endClockEpochNanos = SystemClock.systemUTC().currentTimeNanos(); final RuntimeMemory runtimeMemory = RuntimeMemory.getInstance(); runtimeMemory.read(endMemorySample); - diffPoolAllocatedBytes = - minus(QueryPerformanceRecorder.getPoolAllocatedBytesForCurrentThread(), startPoolAllocatedBytes); - diffAllocatedBytes = minus(ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(), startAllocatedBytes); - - if (basePerformanceEntry != null) { - diffUserCpuNanos += basePerformanceEntry.getIntervalUserCpuNanos(); - diffCpuNanos += basePerformanceEntry.getIntervalCpuNanos(); - - diffAllocatedBytes += basePerformanceEntry.getIntervalAllocatedBytes(); - diffPoolAllocatedBytes += basePerformanceEntry.getIntervalPoolAllocatedBytes(); - } - state = closingState; - return recorderToNotify.releaseNugget(this); + onCloseCallback.accept(this); } } @Override public String toString() { - return evaluationNumber - + ":" + description - + ":" + callerLine; + return new LogOutputStringImpl().append(this).toString(); } - public int getEvaluationNumber() { + @Override + public LogOutput append(@NotNull final LogOutput logOutput) { + // override BasePerformanceEntry's impl + return logOutput.append(evaluationNumber) + .append(":").append(isQueryLevel() ? "query_level" : Integer.toString(operationNumber)) + .append(":").append(description) + .append(":").append(callerLine); + } + + public long getEvaluationNumber() { return evaluationNumber; } + public long getParentEvaluationNumber() { + return parentEvaluationNumber; + } + + public int getOperationNumber() { + return operationNumber; + } + + public int getParentOperationNumber() { + return parentOperationNumber; + } + public int getDepth() { return depth; } - public String getName() { + public String getDescription() { return description; } + @Nullable + public String getSessionId() { + return sessionId; + } + public boolean isUser() { return isUser; } - public boolean isTopLevel() { + public boolean isQueryLevel() { + return operationNumber == NULL_INT; + } + + @SuppressWarnings("unused") + public boolean isTopLevelQuery() { + return isQueryLevel() && parentEvaluationNumber == NULL_LONG; + } + + @SuppressWarnings("unused") + public boolean isTopLevelOperation() { + // note that query level nuggets have depth == NULL_INT return depth == 0; } @@ -269,37 +404,17 @@ public String getCallerLine() { } /** - * @return nanoseconds elapsed, once state != QueryState.RUNNING() has been called. + * @return wall clock start time in nanoseconds from the epoch */ - public Long getTotalTimeNanos() { - return totalTimeNanos; + public long getStartClockEpochNanos() { + return startClockEpochNanos; } /** - * @return wall clock time in milliseconds from the epoch + * @return wall clock end time in nanoseconds from the epoch */ - public long getStartClockTime() { - return startClockTime; - } - - /** - * Get nanoseconds of CPU time attributed to the instrumented operation. - * - * @return The nanoseconds of CPU time attributed to the instrumented operation, or {@link QueryConstants#NULL_LONG} - * if not enabled/supported. - */ - public long getCpuNanos() { - return diffCpuNanos; - } - - /** - * Get nanoseconds of user mode CPU time attributed to the instrumented operation. - * - * @return The nanoseconds of user mode CPU time attributed to the instrumented operation, or - * {@link QueryConstants#NULL_LONG} if not enabled/supported. - */ - public long getUserCpuNanos() { - return diffUserCpuNanos; + public long getEndClockEpochNanos() { + return endClockEpochNanos; } /** @@ -324,7 +439,7 @@ public long getDiffFreeMemory() { } /** - * @return total (allocated high water mark) memory difference between time of completion and creation + * @return total (allocated high watermark) memory difference between time of completion and creation */ public long getDiffTotalMemory() { return endMemorySample.totalMemory - startMemorySample.totalMemory; @@ -345,26 +460,6 @@ public long getDiffCollectionTimeNanos() { .millisToNanos(endMemorySample.totalCollectionTimeMs - startMemorySample.totalCollectionTimeMs); } - /** - * Get bytes of allocated memory attributed to the instrumented operation. - * - * @return The bytes of allocated memory attributed to the instrumented operation, or - * {@link QueryConstants#NULL_LONG} if not enabled/supported. - */ - public long getAllocatedBytes() { - return diffAllocatedBytes; - } - - /** - * Get bytes of allocated pooled/reusable memory attributed to the instrumented operation. - * - * @return The bytes of allocated pooled/reusable memory attributed to the instrumented operation, or - * {@link QueryConstants#NULL_LONG} if not enabled/supported. - */ - public long getPoolAllocatedBytes() { - return diffPoolAllocatedBytes; - } - /** * @return true if this nugget was interrupted by an abort() call. */ @@ -375,54 +470,15 @@ public boolean wasInterrupted() { /** * Ensure this nugget gets logged, alongside its stack of nesting operations. */ - public void setShouldLogMeAndStackParents() { - shouldLogMeAndStackParents = true; + void setShouldLog() { + shouldLog = true; } /** * @return true if this nugget triggers the logging of itself and every other nugget in its stack of nesting * operations. */ - public boolean shouldLogMenAndStackParents() { - return shouldLogMeAndStackParents; - } - - /** - * When we track data from other threads that should be attributed to this operation, we tack extra - * BasePerformanceEntry values onto this nugget when it is closed. - * - * The CPU time, reads, and allocations are counted against this nugget. Wall clock time is ignored. - */ - public void addBaseEntry(BasePerformanceEntry baseEntry) { - if (this.basePerformanceEntry == null) { - this.basePerformanceEntry = baseEntry; - } else { - this.basePerformanceEntry.accumulate(baseEntry); - } - } - - /** - * Suppress de minimus performance nuggets using the properties defined above. - * - * @param isUninstrumented this nugget for uninstrumented code? If so the thresholds for inclusion in the logs are - * configured distinctly. - * - * @return if this nugget is significant enough to be logged. - */ - boolean shouldLogNugget(final boolean isUninstrumented) { - if (shouldLogMeAndStackParents) { - return true; - } - // Nuggets will have a null value for total time if they weren't closed for a RUNNING query; this is an abnormal - // condition and the nugget should be logged - if (getTotalTimeNanos() == null) { - return true; - } - - if (isUninstrumented) { - return UNINSTRUMENTED_LOG_THRESHOLD.shouldLog(getTotalTimeNanos()); - } else { - return LOG_THRESHOLD.shouldLog(getTotalTimeNanos()); - } + boolean shouldLog() { + return shouldLog; } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorder.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorder.java index f92c2b15a98..6a91ae9e201 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorder.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorder.java @@ -3,428 +3,269 @@ */ package io.deephaven.engine.table.impl.perf; -import io.deephaven.base.verify.Assert; -import io.deephaven.configuration.Configuration; -import io.deephaven.datastructures.util.CollectionUtil; -import io.deephaven.chunk.util.pools.ChunkPoolInstrumentation; -import io.deephaven.engine.exceptions.CancellationException; -import io.deephaven.engine.table.Table; -import io.deephaven.engine.util.TableTools; -import io.deephaven.engine.updategraph.UpdateGraphLock; import io.deephaven.util.QueryConstants; +import io.deephaven.util.SafeCloseable; +import io.deephaven.util.annotations.FinalDefault; import io.deephaven.util.function.ThrowingRunnable; import io.deephaven.util.function.ThrowingSupplier; -import io.deephaven.util.profiling.ThreadProfiler; -import org.apache.commons.lang3.mutable.MutableLong; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; -import java.io.*; -import java.net.URL; import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; -import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.minus; -import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.plus; - /** * Query performance instrumentation tools. Manages a hierarchy of {@link QueryPerformanceNugget} instances. - *

- * Thread-safety note: This used to be thread-safe only by virtue of using a thread-local instance. Now it's - * aggressively synchronized so we can abort it from outside the "owner" thread. */ -public class QueryPerformanceRecorder implements Serializable { - - public static final String UNINSTRUMENTED_CODE_DESCRIPTION = "Uninstrumented code"; - - private static final long serialVersionUID = 2L; - private static final String[] packageFilters; - - private QueryPerformanceNugget queryNugget; - private final ArrayList operationNuggets = new ArrayList<>(); - - private QueryState state; - private transient QueryPerformanceNugget catchAllNugget; - private final transient Deque userNuggetStack = new ArrayDeque<>(); - - private static final AtomicInteger queriesProcessed = new AtomicInteger(0); - - private static final ThreadLocal theLocal = - ThreadLocal.withInitial(QueryPerformanceRecorder::new); - private static final ThreadLocal poolAllocatedBytes = ThreadLocal.withInitial( - () -> new MutableLong(ThreadProfiler.DEFAULT.memoryProfilingAvailable() ? 0L - : io.deephaven.util.QueryConstants.NULL_LONG)); - private static final ThreadLocal cachedCallsite = new ThreadLocal<>(); - - static { - final Configuration config = Configuration.getInstance(); - final Set filters = new HashSet<>(); - - final String propVal = config.getProperty("QueryPerformanceRecorder.packageFilter.internal"); - final URL path = QueryPerformanceRecorder.class.getResource("/" + propVal); - if (path == null) { - throw new RuntimeException("Can not locate package filter file " + propVal + " in classpath"); - } - - try (final BufferedReader reader = new BufferedReader(new InputStreamReader(path.openStream()))) { - String line; - while ((line = reader.readLine()) != null) { - if (!line.isEmpty()) { - filters.add(line); - } - } - } catch (IOException e) { - throw new UncheckedIOException("Error reading file " + propVal, e); - } +public interface QueryPerformanceRecorder { - packageFilters = filters.toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); - } + String UNINSTRUMENTED_CODE_DESCRIPTION = "Uninstrumented code"; - public static QueryPerformanceRecorder getInstance() { - return theLocal.get(); - } + ///////////////////////////////////// + // Core Engine Instrumentation API // + ///////////////////////////////////// - public static void resetInstance() { - // clear interrupted - because this is a good place to do it - no cancellation exception here though - // noinspection ResultOfMethodCallIgnored - Thread.interrupted(); - theLocal.remove(); + static QueryPerformanceRecorder getInstance() { + return QueryPerformanceRecorderState.getInstance(); } /** - * Start a query. - * - * @param description A description for the query. + * Create a nugget at the top of the user stack. May return a {@link QueryPerformanceNugget#DUMMY_NUGGET} if no + * recorder is installed. * - * @return a unique evaluation number to identify this query execution. - */ - public synchronized int startQuery(final String description) { - clear(); - final int evaluationNumber = queriesProcessed.getAndIncrement(); - queryNugget = new QueryPerformanceNugget(evaluationNumber, description); - state = QueryState.RUNNING; - startCatchAll(evaluationNumber); - return evaluationNumber; - } - - /** - * Abort a query. + * @param name the nugget name + * @return A new QueryPerformanceNugget to encapsulate user query operations. {@link QueryPerformanceNugget#close()} + * must be called on the nugget. */ - public synchronized void abortQuery() { - if (state != QueryState.RUNNING) { - return; - } - state = QueryState.INTERRUPTED; - if (catchAllNugget != null) { - stopCatchAll(true); - } else { - while (!userNuggetStack.isEmpty()) { - userNuggetStack.peekLast().abort(this); - } - } - queryNugget.abort(this); + @FinalDefault + default QueryPerformanceNugget getNugget(@NotNull String name) { + return getNugget(name, QueryConstants.NULL_LONG); } /** - * Return the query's current state - * - * @return the query's state or null if it isn't initialized yet + * Create a nugget at the top of the user stack. May return a {@link QueryPerformanceNugget#DUMMY_NUGGET} if no + * recorder is installed. + * + * @param name the nugget name + * @param inputSize the nugget's input size + * @return A new QueryPerformanceNugget to encapsulate user query operations. {@link QueryPerformanceNugget#close()} + * must be called on the nugget. */ - public synchronized QueryState getState() { - return state; - } + QueryPerformanceNugget getNugget(@NotNull String name, long inputSize); /** - * End a query. + * This is the nugget enclosing the current operation. It may belong to the dummy recorder, or a real one. + * + * @return Either a "catch-all" nugget, or the top of the user nugget stack. */ - public synchronized boolean endQuery() { - if (state != QueryState.RUNNING) { - return false; - } - - state = QueryState.FINISHED; - Assert.neqNull(catchAllNugget, "catchAllNugget"); - Assert.neqNull(queryNugget, "queryNugget"); - stopCatchAll(false); - return queryNugget.done(this); - } + QueryPerformanceNugget getEnclosingNugget(); - private void startCatchAll(final int evaluationNumber) { - catchAllNugget = new QueryPerformanceNugget( - evaluationNumber, 0, UNINSTRUMENTED_CODE_DESCRIPTION, false, QueryConstants.NULL_LONG); - } - private void stopCatchAll(final boolean abort) { - final boolean shouldLog; - if (abort) { - shouldLog = catchAllNugget.abort(this); - } else { - shouldLog = catchAllNugget.done(this); - } - if (shouldLog) { - operationNuggets.add(catchAllNugget); - } - catchAllNugget = null; + interface QueryDataConsumer { + void accept(long evaluationNumber, int operationNumber, boolean uninstrumented); } /** - * @param name the nugget name - * @return A new QueryPerformanceNugget to encapsulate user query operations. done() must be called on the nugget. + * Provide current query data via the consumer. + * + * @param consumer a callback to receive query data */ - public QueryPerformanceNugget getNugget(String name) { - return getNugget(name, QueryConstants.NULL_LONG); - } + void supplyQueryData(@NotNull QueryDataConsumer consumer); /** - * @param name the nugget name - * @param inputSize the nugget's input size - * @return A new QueryPerformanceNugget to encapsulate user query operations. done() must be called on the nugget. + * @return The current callsite. This is the last set callsite or the line number of the user's detected callsite. */ - public synchronized QueryPerformanceNugget getNugget(final String name, final long inputSize) { - if (state != QueryState.RUNNING) { - return QueryPerformanceNugget.DUMMY_NUGGET; - } - if (Thread.interrupted()) { - throw new CancellationException("interrupted in QueryPerformanceNugget"); - } - if (catchAllNugget != null) { - stopCatchAll(false); - } - final QueryPerformanceNugget nugget = new QueryPerformanceNugget( - queryNugget.getEvaluationNumber(), userNuggetStack.size(), - name, true, inputSize); - operationNuggets.add(nugget); - userNuggetStack.addLast(nugget); - return nugget; + static String getCallerLine() { + return QueryPerformanceRecorderState.getCallerLine(); } /** - * Note: Do not call this directly - it's for nugget use only. Call nugget.done(), instead. TODO: Reverse the - * disclaimer above - I think it's much better for the recorder to support done/abort(nugget), rather than - * continuing to have the nugget support done/abort(recorder). - * - * @param nugget the nugget to be released - * @return If the nugget passes criteria for logging. + * Attempt to set the thread local callsite so that invocations of {@link #getCallerLine()} will not spend time + * trying to recompute. + *

+ * This method returns a boolean if the value was successfully set. In the event this returns true, it's the + * responsibility of the caller to invoke {@link #clearCallsite()} when the operation is complete. + *

+ * It is good practice to do this with try{} finally{} block + * + *

+     * final boolean shouldClear = QueryPerformanceRecorder.setCallsite("CALLSITE");
+     * try {
+     *     // Do work
+     * } finally {
+     *     if (shouldClear) {
+     *         QueryPerformanceRecorder.clearCallsite();
+     *     }
+     * }
+     * 
+ * + * @param callsite The call site to use. + * + * @return true if successfully set, false otherwise */ - synchronized boolean releaseNugget(QueryPerformanceNugget nugget) { - boolean shouldLog = nugget.shouldLogNugget(nugget == catchAllNugget); - if (!nugget.isUser()) { - return shouldLog; - } - - final QueryPerformanceNugget removed = userNuggetStack.removeLast(); - if (nugget != removed) { - throw new IllegalStateException( - "Released query performance nugget " + nugget + " (" + System.identityHashCode(nugget) + - ") didn't match the top of the user nugget stack " + removed + " (" - + System.identityHashCode(removed) + - ") - did you follow the correct try/finally pattern?"); - } - - if (removed.shouldLogMenAndStackParents()) { - shouldLog = true; - if (userNuggetStack.size() > 0) { - userNuggetStack.getLast().setShouldLogMeAndStackParents(); - } - } - if (!shouldLog) { - // If we have filtered this nugget, by our filter design we will also have filtered any nuggets it encloses. - // This means it *must* be the last entry in operationNuggets, so we can safely remove it in O(1). - final QueryPerformanceNugget lastNugget = operationNuggets.remove(operationNuggets.size() - 1); - if (nugget != lastNugget) { - throw new IllegalStateException( - "Filtered query performance nugget " + nugget + " (" + System.identityHashCode(nugget) + - ") didn't match the last operation nugget " + lastNugget + " (" - + System.identityHashCode(lastNugget) + - ")"); - } - } - - if (userNuggetStack.isEmpty() && queryNugget != null && state == QueryState.RUNNING) { - startCatchAll(queryNugget.getEvaluationNumber()); - } - - return shouldLog; - } - - public interface EntrySetter { - void set(int evaluationNumber, int operationNumber, boolean uninstrumented); - } - - public synchronized QueryPerformanceNugget getOuterNugget() { - return userNuggetStack.peekLast(); + static boolean setCallsite(@NotNull final String callsite) { + return QueryPerformanceRecorderState.setCallsite(callsite); } - // returns true if uninstrumented code data was captured. - public void setQueryData(final EntrySetter setter) { - final int evaluationNumber; - final int operationNumber; - boolean uninstrumented = false; - synchronized (this) { - if (state != QueryState.RUNNING) { - setter.set(QueryConstants.NULL_INT, QueryConstants.NULL_INT, false); - return; - } - evaluationNumber = queryNugget.getEvaluationNumber(); - operationNumber = operationNuggets.size(); - if (operationNumber > 0) { - // ensure UPL and QOPL are consistent/joinable. - if (userNuggetStack.size() > 0) { - userNuggetStack.getLast().setShouldLogMeAndStackParents(); - } else { - uninstrumented = true; - if (catchAllNugget != null) { - catchAllNugget.setShouldLogMeAndStackParents(); - } - } - } - } - setter.set(evaluationNumber, operationNumber, uninstrumented); + /** + * Attempt to compute and set the thread local callsite so that invocations of {@link #getCallerLine()} will not + * spend time trying to recompute. + *

+ * Users should follow the best practice as described by {@link #setCallsite(String)} + * + * @return true if the callsite was computed and set. + */ + static boolean setCallsite() { + return QueryPerformanceRecorderState.setCallsite(); } - private void clear() { - queryNugget = null; - catchAllNugget = null; - operationNuggets.clear(); - userNuggetStack.clear(); + /** + * Clear any previously set callsite. See {@link #setCallsite(String)} + */ + static void clearCallsite() { + QueryPerformanceRecorderState.clearCallsite(); } - public synchronized QueryPerformanceNugget getQueryLevelPerformanceData() { - return queryNugget; - } + //////////////////////////////////////////// + // Server-Level Performance Recording API // + //////////////////////////////////////////// - public synchronized List getOperationLevelPerformanceData() { - return operationNuggets; + /** + * Construct a QueryPerformanceRecorder for a top-level query. + * + * @param description the query description + * @param nuggetFactory the nugget factory + * @return a new QueryPerformanceRecorder + */ + static QueryPerformanceRecorder newQuery( + @NotNull final String description, + @Nullable final String sessionId, + @NotNull final QueryPerformanceNugget.Factory nuggetFactory) { + return new QueryPerformanceRecorderImpl(description, sessionId, null, nuggetFactory); } - @SuppressWarnings("unused") - public synchronized Table getTimingResultsAsTable() { - final int count = operationNuggets.size(); - final String[] names = new String[count]; - final Long[] timeNanos = new Long[count]; - final String[] callerLine = new String[count]; - final Boolean[] isTopLevel = new Boolean[count]; - final Boolean[] isCompileTime = new Boolean[count]; - - for (int i = 0; i < operationNuggets.size(); i++) { - timeNanos[i] = operationNuggets.get(i).getTotalTimeNanos(); - names[i] = operationNuggets.get(i).getName(); - callerLine[i] = operationNuggets.get(i).getCallerLine(); - isTopLevel[i] = operationNuggets.get(i).isTopLevel(); - isCompileTime[i] = operationNuggets.get(i).getName().startsWith("Compile:"); - } - return TableTools.newTable( - TableTools.col("names", names), - TableTools.col("line", callerLine), - TableTools.col("timeNanos", timeNanos), - TableTools.col("isTopLevel", isTopLevel), - TableTools.col("isCompileTime", isCompileTime)); + /** + * Construct a QueryPerformanceRecorder for a sub-level query. + * + * @param description the query description + * @param nuggetFactory the nugget factory + * @return a new QueryPerformanceRecorder + */ + static QueryPerformanceRecorder newSubQuery( + @NotNull final String description, + @Nullable final QueryPerformanceRecorder parent, + @NotNull final QueryPerformanceNugget.Factory nuggetFactory) { + return new QueryPerformanceRecorderImpl(description, null, parent, nuggetFactory); } /** - * Install {@link QueryPerformanceRecorder#recordPoolAllocation(java.util.function.Supplier)} as the allocation - * recorder for {@link io.deephaven.chunk.util.pools.ChunkPool chunk pools}. + * Return the query's current state + * + * @return the query's state */ - public static void installPoolAllocationRecorder() { - ChunkPoolInstrumentation.setAllocationRecorder(QueryPerformanceRecorder::recordPoolAllocation); - } + QueryState getState(); /** - * Install this {@link QueryPerformanceRecorder} as the lock action recorder for {@link UpdateGraphLock}. + * Starts a query. + *

+ * A query is {@link QueryState#RUNNING RUNNING} if it has been started or {@link #resumeQuery() resumed} without a + * subsequent {@link #endQuery() end}, {@link #suspendQuery() suspend}, or {@link #abortQuery() abort}. + * + * @throws IllegalStateException if the query state isn't {@link QueryState#NOT_STARTED NOT_STARTED} or another + * query is running on this thread */ - public static void installUpdateGraphLockInstrumentation() { - UpdateGraphLock.installInstrumentation(new UpdateGraphLock.Instrumentation() { - - @Override - public void recordAction(@NotNull String description, @NotNull Runnable action) { - QueryPerformanceRecorder.withNugget(description, action::run); - } - - @Override - public void recordActionInterruptibly(@NotNull String description, - @NotNull ThrowingRunnable action) - throws InterruptedException { - QueryPerformanceRecorder.withNuggetThrowing(description, action); - } - }); - } + SafeCloseable startQuery(); /** - * Record a single-threaded operation's allocations as "pool" allocated memory attributable to the current thread. + * End a query. + *

+ * A query is {@link QueryState#RUNNING RUNNING} if it has been {@link #startQuery() started} or + * {@link #resumeQuery() resumed} without a subsequent end, {@link #suspendQuery() suspend}, or {@link #abortQuery() + * abort}. * - * @param operation The operation to record allocation for - * @return The result of the operation. + * @return whether the query should be logged + * @throws IllegalStateException if the query state isn't {@link QueryState#RUNNING RUNNING}, + * {@link QueryState#INTERRUPTED INTERRUPTED}, or was not running on this thread */ - public static RESULT_TYPE recordPoolAllocation(@NotNull final Supplier operation) { - final long startThreadAllocatedBytes = ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(); - try { - return operation.get(); - } finally { - final long endThreadAllocatedBytes = ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(); - final MutableLong poolAllocatedBytesForCurrentThread = poolAllocatedBytes.get(); - poolAllocatedBytesForCurrentThread.setValue(plus(poolAllocatedBytesForCurrentThread.longValue(), - minus(endThreadAllocatedBytes, startThreadAllocatedBytes))); - } - } + boolean endQuery(); /** - * Get the total bytes of pool-allocated memory attributed to this thread via - * {@link #recordPoolAllocation(Supplier)}. + * Suspends a query. + *

+ * A query is {@link QueryState#RUNNING RUNNING} if it has been {@link #startQuery() started} or + * {@link #resumeQuery() resumed} without a subsequent {@link #endQuery() end}, suspend, or {@link #abortQuery() + * abort}. * - * @return The total bytes of pool-allocated memory attributed to this thread. + * @throws IllegalStateException if the query state isn't {@link QueryState#RUNNING RUNNING} or was not running on + * this thread */ - public static long getPoolAllocatedBytesForCurrentThread() { - return poolAllocatedBytes.get().longValue(); - } + void suspendQuery(); - public static String getCallerLine() { - String callerLineCandidate = cachedCallsite.get(); + /** + * Resumes a suspend query. + *

+ * A query is {@link QueryState#RUNNING RUNNING} if it has been {@link #startQuery() started} or resumed without a + * subsequent {@link #endQuery() end}, {@link #suspendQuery() suspend}, or {@link #abortQuery() abort}. + * + * @throws IllegalStateException if the query state isn't {@link QueryState#SUSPENDED SUSPENDED} or another query is + * running on this thread + */ + SafeCloseable resumeQuery(); - if (callerLineCandidate == null) { - final StackTraceElement[] stack = (new Exception()).getStackTrace(); - for (int i = stack.length - 1; i > 0; i--) { - final String className = stack[i].getClassName(); + /** + * Abort a query. + *

+ * A query is {@link QueryState#RUNNING RUNNING} if it has been {@link #startQuery() started} or + * {@link #resumeQuery() resumed} without a subsequent {@link #endQuery() end}, {@link #suspendQuery() suspend}, or + * abort. + *

+ * Note that this method is invoked out-of-band and does not throw if the query has been completed. + */ + @SuppressWarnings("unused") + void abortQuery(); - if (className.startsWith("io.deephaven.engine.util.GroovyDeephavenSession")) { - callerLineCandidate = "Groovy Script"; - } else if (Arrays.stream(packageFilters).noneMatch(className::startsWith)) { - callerLineCandidate = stack[i].getFileName() + ":" + stack[i].getLineNumber(); - } - } - } + /** + * @return the query level performance data + */ + QueryPerformanceNugget getQueryLevelPerformanceData(); - return callerLineCandidate == null ? "Internal" : callerLineCandidate; - } + /** + * This getter should be called by exclusive owners of the recorder, and never concurrently with mutators. + * + * @return A list of loggable operation performance data. + */ + List getOperationLevelPerformanceData(); - /*------------------------------------------------------------------------------------------------------------------ - * TODO: the following execute-around methods might be better in a separate class or interface - */ + /** + * Accumulate performance information from another recorder into this one. The provided recorder will not be + * mutated. + * + * @param subQuery the recorder to accumulate into this + */ + void accumulate(@NotNull QueryPerformanceRecorder subQuery); - private static void finishAndClear(QueryPerformanceNugget nugget, boolean needClear) { - if (nugget != null) { - nugget.done(); - } + /** + * @return whether a sub-query was ever accumulated into this recorder + */ + @SuppressWarnings("unused") + boolean hasSubQueries(); - if (needClear) { - clearCallsite(); - } - } + /////////////////////////////////////////////////// + // Convenience Methods for Recording Performance // + /////////////////////////////////////////////////// /** * Surround the given code with a Performance Nugget - * + * * @param name the nugget name * @param r the stuff to run */ - public static void withNugget(final String name, final Runnable r) { + static void withNugget(final String name, final Runnable r) { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - - try { - nugget = getInstance().getNugget(name); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name)) { r.run(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } @@ -435,15 +276,12 @@ public static void withNugget(final String name, final Runnable r) { * @param r the stuff to run * @return the result of the stuff to run */ - public static T withNugget(final String name, final Supplier r) { + static T withNugget(final String name, final Supplier r) { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - - try { - nugget = getInstance().getNugget(name); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name)) { return r.get(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } @@ -453,16 +291,14 @@ public static T withNugget(final String name, final Supplier r) { * @param r the stuff to run * @throws T exception of type T */ - public static void withNuggetThrowing( + static void withNuggetThrowing( final String name, final ThrowingRunnable r) throws T { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - try { - nugget = getInstance().getNugget(name); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name)) { r.run(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } @@ -474,33 +310,29 @@ public static void withNuggetThrowing( * @return the result of the stuff to run * @throws ExceptionType exception of type ExceptionType */ - public static R withNuggetThrowing( + static R withNuggetThrowing( final String name, final ThrowingSupplier r) throws ExceptionType { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - try { - nugget = getInstance().getNugget(name); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name)) { return r.get(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } /** * Surround the given code with a Performance Nugget - * + * * @param name the nugget name * @param r the stuff to run */ - public static void withNugget(final String name, final long inputSize, final Runnable r) { + static void withNugget(final String name, final long inputSize, final Runnable r) { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - try { - nugget = getInstance().getNugget(name, inputSize); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name, inputSize)) { r.run(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } @@ -511,14 +343,12 @@ public static void withNugget(final String name, final long inputSize, final Run * @param r the stuff to run * @return the result of the stuff to run */ - public static T withNugget(final String name, final long inputSize, final Supplier r) { + static T withNugget(final String name, final long inputSize, final Supplier r) { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - try { - nugget = getInstance().getNugget(name, inputSize); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name, inputSize)) { return r.get(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } @@ -529,17 +359,15 @@ public static T withNugget(final String name, final long inputSize, final Su * @throws T exception of type T */ @SuppressWarnings("unused") - public static void withNuggetThrowing( + static void withNuggetThrowing( final String name, final long inputSize, final ThrowingRunnable r) throws T { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - try { - nugget = getInstance().getNugget(name, inputSize); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name, inputSize)) { r.run(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } @@ -552,86 +380,26 @@ public static void withNuggetThrowing( * @throws ExceptionType exception of type ExceptionType */ @SuppressWarnings("unused") - public static R withNuggetThrowing( + static R withNuggetThrowing( final String name, final long inputSize, final ThrowingSupplier r) throws ExceptionType { final boolean needClear = setCallsite(); - QueryPerformanceNugget nugget = null; - try { - nugget = getInstance().getNugget(name, inputSize); + try (final QueryPerformanceNugget ignored = getInstance().getNugget(name, inputSize)) { return r.get(); } finally { - finishAndClear(nugget, needClear); + maybeClearCallsite(needClear); } } /** - *

- * Attempt to set the thread local callsite so that invocations of {@link #getCallerLine()} will not spend time - * trying to recompute. - *

- * - *

- * This method returns a boolean if the value was successfully set. In the event this returns true, it's the - * responsibility of the caller to invoke {@link #clearCallsite()} when the operation is complete. - *

- * - *

- * It is good practice to do this with try{} finally{} block - *

+ * Clear the callsite if needed. * - *
-     * final boolean shouldClear = QueryPerformanceRecorder.setCallsite("CALLSITE");
-     * try {
-     *     // Do work
-     * } finally {
-     *     if (shouldClear) {
-     *         QueryPerformanceRecorder.clearCallsite();
-     *     }
-     * }
-     * 
- * - * @param callsite The call site to use. - * - * @return true if successfully set, false otherwise/ + * @param needClear true if the callsite needs to be cleared */ - public static boolean setCallsite(String callsite) { - if (cachedCallsite.get() == null) { - cachedCallsite.set(callsite); - return true; - } - - return false; - } - - /** - *

- * Attempt to compute and set the thread local callsite so that invocations of {@link #getCallerLine()} will not - * spend time trying to recompute. - *

- * - *

- * Users should follow the best practice as described by {@link #setCallsite(String)} - *

- * - * @return true if the callsite was computed and set. - */ - public static boolean setCallsite() { - // This is very similar to the other getCallsite, but we don't want to invoke getCallerLine() unless we - // really need to. - if (cachedCallsite.get() == null) { - cachedCallsite.set(getCallerLine()); - return true; + private static void maybeClearCallsite(final boolean needClear) { + if (needClear) { + clearCallsite(); } - - return false; - } - - /** - * Clear any previously set callsite. See {@link #setCallsite(String)} - */ - public static void clearCallsite() { - cachedCallsite.remove(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderImpl.java new file mode 100644 index 00000000000..1900a70bcdf --- /dev/null +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderImpl.java @@ -0,0 +1,344 @@ +/** + * Copyright (c) 2016-2023 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.engine.table.impl.perf; + +import io.deephaven.base.verify.Assert; +import io.deephaven.engine.exceptions.CancellationException; +import io.deephaven.util.SafeCloseable; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.util.*; + +import static io.deephaven.util.QueryConstants.NULL_LONG; + +/** + * Query performance instrumentation implementation. Manages a hierarchy of {@link QueryPerformanceNugget} instances. + *

+ * Many methods are synchronized to 1) support external abort of query and 2) for scenarios where the query is suspended + * and resumed on another thread. + */ +public class QueryPerformanceRecorderImpl implements QueryPerformanceRecorder { + private static final QueryPerformanceLogThreshold LOG_THRESHOLD = new QueryPerformanceLogThreshold("", 1_000_000); + private static final QueryPerformanceLogThreshold UNINSTRUMENTED_LOG_THRESHOLD = + new QueryPerformanceLogThreshold("Uninstrumented", 1_000_000_000); + + @Nullable + private final QueryPerformanceRecorder parent; + private final QueryPerformanceNugget queryNugget; + private final QueryPerformanceNugget.Factory nuggetFactory; + private final ArrayList operationNuggets = new ArrayList<>(); + private final Deque userNuggetStack = new ArrayDeque<>(); + + private QueryState state = QueryState.NOT_STARTED; + private volatile boolean hasSubQueries; + private QueryPerformanceNugget catchAllNugget; + + /** + * Constructs a QueryPerformanceRecorderImpl. + * + * @param description a description for the query + * @param nuggetFactory the factory to use for creating new nuggets + * @param parent the parent query if it exists + */ + QueryPerformanceRecorderImpl( + @NotNull final String description, + @Nullable final String sessionId, + @Nullable final QueryPerformanceRecorder parent, + @NotNull final QueryPerformanceNugget.Factory nuggetFactory) { + if (parent == null) { + queryNugget = nuggetFactory.createForQuery( + QueryPerformanceRecorderState.QUERIES_PROCESSED.getAndIncrement(), description, sessionId, + this::releaseNugget); + } else { + queryNugget = nuggetFactory.createForSubQuery( + parent.getQueryLevelPerformanceData(), + QueryPerformanceRecorderState.QUERIES_PROCESSED.getAndIncrement(), description, + this::releaseNugget); + } + this.parent = parent; + this.nuggetFactory = nuggetFactory; + } + + @Override + public synchronized void abortQuery() { + // TODO (https://github.com/deephaven/deephaven-core/issues/53): support out-of-order abort + if (state != QueryState.RUNNING) { + return; + } + state = QueryState.INTERRUPTED; + if (catchAllNugget != null) { + stopCatchAll(true); + } else { + while (!userNuggetStack.isEmpty()) { + userNuggetStack.peekLast().abort(); + } + } + queryNugget.abort(); + } + + /** + * Return the query's current state + * + * @return the query's state or null if it isn't initialized yet + */ + @Override + public synchronized QueryState getState() { + return state; + } + + @Override + public synchronized SafeCloseable startQuery() { + if (state != QueryState.NOT_STARTED) { + throw new IllegalStateException("Can't resume a query that has already started"); + } + return resumeInternal(); + } + + @Override + public synchronized boolean endQuery() { + if (state != QueryState.RUNNING) { + if (state != QueryState.INTERRUPTED) { + // We only allow the query to be RUNNING or INTERRUPTED when we end it; else we are in an illegal state. + throw new IllegalStateException("Can't end a query that isn't running or interrupted"); + } + return false; + } + state = QueryState.FINISHED; + suspendInternal(); + + queryNugget.close(); + if (parent != null) { + parent.accumulate(this); + } + return shouldLogNugget(queryNugget) || !operationNuggets.isEmpty() || hasSubQueries; + } + + /** + * Suspends a query. + *

+ * This resets the thread local and assumes that this performance nugget may be resumed on another thread. + */ + public synchronized void suspendQuery() { + if (state != QueryState.RUNNING) { + throw new IllegalStateException("Can't suspend a query that isn't running"); + } + state = QueryState.SUSPENDED; + suspendInternal(); + queryNugget.onBaseEntryEnd(); + } + + private void suspendInternal() { + final QueryPerformanceRecorder threadLocalInstance = QueryPerformanceRecorderState.getInstance(); + if (threadLocalInstance != this) { + throw new IllegalStateException("Can't suspend a query that doesn't belong to this thread"); + } + + Assert.neqNull(catchAllNugget, "catchAllNugget"); + stopCatchAll(false); + + // uninstall this instance from the thread local + QueryPerformanceRecorderState.resetInstance(); + } + + /** + * Resumes a suspend query. + *

+ * It is an error to resume a query while another query is running on this thread. + * + * @return this + */ + public synchronized SafeCloseable resumeQuery() { + if (state != QueryState.SUSPENDED) { + throw new IllegalStateException("Can't resume a query that isn't suspended"); + } + + return resumeInternal(); + } + + private SafeCloseable resumeInternal() { + final QueryPerformanceRecorder threadLocalInstance = QueryPerformanceRecorderState.getInstance(); + if (threadLocalInstance != QueryPerformanceRecorderState.DUMMY_RECORDER) { + throw new IllegalStateException("Can't resume a query while another query is in operation"); + } + QueryPerformanceRecorderState.THE_LOCAL.set(this); + + queryNugget.onBaseEntryStart(); + state = QueryState.RUNNING; + Assert.eqNull(catchAllNugget, "catchAllNugget"); + startCatchAll(); + + return QueryPerformanceRecorderState::resetInstance; + } + + private void startCatchAll() { + catchAllNugget = nuggetFactory.createForCatchAll(queryNugget, operationNuggets.size(), this::releaseNugget); + catchAllNugget.onBaseEntryStart(); + } + + private void stopCatchAll(final boolean abort) { + if (abort) { + catchAllNugget.abort(); + } else { + catchAllNugget.close(); + } + if (catchAllNugget.shouldLog()) { + Assert.eq(operationNuggets.size(), "operationsNuggets.size()", + catchAllNugget.getOperationNumber(), "catchAllNugget.getOperationNumber()"); + operationNuggets.add(catchAllNugget); + } + catchAllNugget = null; + } + + /** + * @param name the nugget name + * @param inputSize the nugget's input size + * @return A new QueryPerformanceNugget to encapsulate user query operations. done() must be called on the nugget. + */ + public synchronized QueryPerformanceNugget getNugget(@NotNull final String name, final long inputSize) { + Assert.eq(state, "state", QueryState.RUNNING, "QueryState.RUNNING"); + if (Thread.interrupted()) { + throw new CancellationException("interrupted in QueryPerformanceNugget"); + } + if (catchAllNugget != null) { + stopCatchAll(false); + } + + final QueryPerformanceNugget parent; + if (userNuggetStack.isEmpty()) { + parent = queryNugget; + } else { + parent = userNuggetStack.peekLast(); + parent.onBaseEntryEnd(); + } + + final QueryPerformanceNugget nugget = nuggetFactory.createForOperation( + parent, operationNuggets.size(), name, inputSize, this::releaseNugget); + nugget.onBaseEntryStart(); + operationNuggets.add(nugget); + userNuggetStack.addLast(nugget); + return nugget; + } + + /** + * This is our onCloseCallback from the nugget. + * + * @param nugget the nugget to be released + */ + private synchronized void releaseNugget(@NotNull final QueryPerformanceNugget nugget) { + final boolean shouldLog = shouldLogNugget(nugget); + if (!nugget.isUser()) { + return; + } + + final QueryPerformanceNugget removed = userNuggetStack.removeLast(); + if (nugget != removed) { + throw new IllegalStateException( + "Released query performance nugget " + nugget + " (" + System.identityHashCode(nugget) + + ") didn't match the top of the user nugget stack " + removed + " (" + + System.identityHashCode(removed) + + ") - did you follow the correct try/finally pattern?"); + } + + // accumulate into the parent and resume it + if (!userNuggetStack.isEmpty()) { + final QueryPerformanceNugget parent = userNuggetStack.getLast(); + parent.accumulate(nugget); + + if (shouldLog) { + parent.setShouldLog(); + } + + // resume the parent + parent.onBaseEntryStart(); + } else { + queryNugget.accumulate(nugget); + } + + if (!shouldLog) { + // If we have filtered this nugget, by our filter design we will also have filtered any nuggets it encloses. + // This means it *must* be the last entry in operationNuggets, so we can safely remove it in O(1). + final QueryPerformanceNugget lastNugget = operationNuggets.remove(operationNuggets.size() - 1); + if (nugget != lastNugget) { + throw new IllegalStateException( + "Filtered query performance nugget " + nugget + " (" + System.identityHashCode(nugget) + + ") didn't match the last operation nugget " + lastNugget + " (" + + System.identityHashCode(lastNugget) + + ")"); + } + } + + if (userNuggetStack.isEmpty() && queryNugget != null && state == QueryState.RUNNING) { + startCatchAll(); + } + } + + private boolean shouldLogNugget(@NotNull QueryPerformanceNugget nugget) { + if (nugget.shouldLog()) { + return true; + } else if (nugget.getEndClockEpochNanos() == NULL_LONG) { + // Nuggets will have a null value for end time if they weren't closed for a RUNNING query; this is an + // abnormal condition and the nugget should be logged + return true; + } else if (nugget == catchAllNugget) { + return UNINSTRUMENTED_LOG_THRESHOLD.shouldLog(nugget.getUsageNanos()); + } else { + return LOG_THRESHOLD.shouldLog(nugget.getUsageNanos()); + } + } + + @Override + public synchronized QueryPerformanceNugget getEnclosingNugget() { + if (userNuggetStack.isEmpty()) { + Assert.neqNull(catchAllNugget, "catchAllNugget"); + return catchAllNugget; + } + return userNuggetStack.peekLast(); + } + + @Override + public void supplyQueryData(final @NotNull QueryDataConsumer consumer) { + final long evaluationNumber; + final int operationNumber; + boolean uninstrumented = false; + synchronized (this) { + // we should never be called if we're not running + Assert.eq(state, "state", QueryState.RUNNING, "QueryState.RUNNING"); + evaluationNumber = queryNugget.getEvaluationNumber(); + operationNumber = operationNuggets.size(); + if (operationNumber > 0) { + // ensure UPL and QOPL are consistent/joinable. + if (!userNuggetStack.isEmpty()) { + userNuggetStack.getLast().setShouldLog(); + } else { + uninstrumented = true; + Assert.neqNull(catchAllNugget, "catchAllNugget"); + catchAllNugget.setShouldLog(); + } + } + } + consumer.accept(evaluationNumber, operationNumber, uninstrumented); + } + + @Override + public QueryPerformanceNugget getQueryLevelPerformanceData() { + return queryNugget; + } + + @Override + public List getOperationLevelPerformanceData() { + return operationNuggets; + } + + @Override + public void accumulate(@NotNull final QueryPerformanceRecorder subQuery) { + hasSubQueries = true; + queryNugget.accumulate(subQuery.getQueryLevelPerformanceData()); + } + + @Override + public boolean hasSubQueries() { + return hasSubQueries; + } +} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderState.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderState.java new file mode 100644 index 00000000000..958cbf4ac0f --- /dev/null +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryPerformanceRecorderState.java @@ -0,0 +1,269 @@ +/** + * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending + */ +package io.deephaven.engine.table.impl.perf; + +import io.deephaven.chunk.util.pools.ChunkPoolInstrumentation; +import io.deephaven.configuration.Configuration; +import io.deephaven.datastructures.util.CollectionUtil; +import io.deephaven.engine.updategraph.UpdateGraphLock; +import io.deephaven.util.QueryConstants; +import io.deephaven.util.SafeCloseable; +import io.deephaven.util.function.ThrowingRunnable; +import io.deephaven.util.profiling.ThreadProfiler; +import org.apache.commons.lang3.mutable.MutableLong; +import org.jetbrains.annotations.NotNull; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UncheckedIOException; +import java.net.URL; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; + +import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.minus; +import static io.deephaven.engine.table.impl.lang.QueryLanguageFunctionUtils.plus; + +public abstract class QueryPerformanceRecorderState { + + static final QueryPerformanceRecorder DUMMY_RECORDER = new DummyQueryPerformanceRecorder(); + static final AtomicLong QUERIES_PROCESSED = new AtomicLong(0); + static final ThreadLocal THE_LOCAL = ThreadLocal.withInitial(() -> DUMMY_RECORDER); + + private static final String[] PACKAGE_FILTERS; + private static final ThreadLocal CACHED_CALLSITE = new ThreadLocal<>(); + private static final ThreadLocal POOL_ALLOCATED_BYTES = ThreadLocal.withInitial( + () -> new MutableLong(ThreadProfiler.DEFAULT.memoryProfilingAvailable() + ? 0L + : io.deephaven.util.QueryConstants.NULL_LONG)); + + static { + // initialize the packages to skip when determining the callsite + + final Configuration config = Configuration.getInstance(); + final Set filters = new HashSet<>(); + + final String propVal = config.getProperty("QueryPerformanceRecorder.packageFilter.internal"); + final URL path = QueryPerformanceRecorder.class.getResource("/" + propVal); + if (path == null) { + throw new RuntimeException("Can not locate package filter file " + propVal + " in classpath"); + } + + try (final BufferedReader reader = new BufferedReader(new InputStreamReader(path.openStream()))) { + String line; + while ((line = reader.readLine()) != null) { + if (!line.isEmpty()) { + filters.add(line); + } + } + } catch (IOException e) { + throw new UncheckedIOException("Error reading file " + propVal, e); + } + + PACKAGE_FILTERS = filters.toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); + } + + private QueryPerformanceRecorderState() { + throw new UnsupportedOperationException("static use only"); + } + + public static QueryPerformanceRecorder getInstance() { + return THE_LOCAL.get(); + } + + static void resetInstance() { + // clear interrupted - because this is a good place to do it - no cancellation exception here though + // noinspection ResultOfMethodCallIgnored + Thread.interrupted(); + THE_LOCAL.remove(); + } + + /** + * Install {@link QueryPerformanceRecorderState#recordPoolAllocation(java.util.function.Supplier)} as the allocation + * recorder for {@link io.deephaven.chunk.util.pools.ChunkPool chunk pools}. + */ + public static void installPoolAllocationRecorder() { + ChunkPoolInstrumentation.setAllocationRecorder(QueryPerformanceRecorderState::recordPoolAllocation); + } + + /** + * Use nuggets from the current {@link QueryPerformanceRecorder} as the lock action recorder for + * {@link UpdateGraphLock}. + */ + public static void installUpdateGraphLockInstrumentation() { + UpdateGraphLock.installInstrumentation(new UpdateGraphLock.Instrumentation() { + + @Override + public void recordAction(@NotNull final String description, @NotNull final Runnable action) { + QueryPerformanceRecorder.withNugget(description, action); + } + + @Override + public void recordActionInterruptibly( + @NotNull final String description, + @NotNull final ThrowingRunnable action) throws InterruptedException { + QueryPerformanceRecorder.withNuggetThrowing(description, action); + } + }); + } + + /** + * Record a single-threaded operation's allocations as "pool" allocated memory attributable to the current thread. + * + * @param operation The operation to record allocation for + * @return The result of the operation. + */ + private static RESULT_TYPE recordPoolAllocation(@NotNull final Supplier operation) { + final long startThreadAllocatedBytes = ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(); + try { + return operation.get(); + } finally { + final long endThreadAllocatedBytes = ThreadProfiler.DEFAULT.getCurrentThreadAllocatedBytes(); + final MutableLong poolAllocatedBytesForCurrentThread = POOL_ALLOCATED_BYTES.get(); + poolAllocatedBytesForCurrentThread.setValue(plus(poolAllocatedBytesForCurrentThread.longValue(), + minus(endThreadAllocatedBytes, startThreadAllocatedBytes))); + } + } + + /** + * Get the total bytes of pool-allocated memory attributed to this thread via + * {@link #recordPoolAllocation(Supplier)}. + * + * @return The total bytes of pool-allocated memory attributed to this thread. + */ + static long getPoolAllocatedBytesForCurrentThread() { + return POOL_ALLOCATED_BYTES.get().longValue(); + } + + /** + * See {@link QueryPerformanceRecorder#getCallerLine()}. + */ + static String getCallerLine() { + String callerLineCandidate = CACHED_CALLSITE.get(); + + if (callerLineCandidate == null) { + final StackTraceElement[] stack = (new Exception()).getStackTrace(); + for (int i = stack.length - 1; i > 0; i--) { + final String className = stack[i].getClassName(); + + if (className.startsWith("io.deephaven.engine.util.GroovyDeephavenSession")) { + callerLineCandidate = "Groovy Script"; + } else if (Arrays.stream(PACKAGE_FILTERS).noneMatch(className::startsWith)) { + callerLineCandidate = stack[i].getFileName() + ":" + stack[i].getLineNumber(); + } + } + } + + return callerLineCandidate == null ? "Internal" : callerLineCandidate; + } + + /** + * See {@link QueryPerformanceRecorder#setCallsite(String)}. + */ + static boolean setCallsite(String callsite) { + if (CACHED_CALLSITE.get() == null) { + CACHED_CALLSITE.set(callsite); + return true; + } + + return false; + } + + /** + * See {@link QueryPerformanceRecorder#setCallsite()}. + */ + static boolean setCallsite() { + // This is very similar to the other setCallsite overload, but we don't want to invoke getCallerLine() unless we + // really need to. + if (CACHED_CALLSITE.get() == null) { + CACHED_CALLSITE.set(getCallerLine()); + return true; + } + + return false; + } + + /** + * Clear any previously set callsite. See {@link #setCallsite(String)} + */ + static void clearCallsite() { + CACHED_CALLSITE.remove(); + } + + /** + * Dummy recorder for use when no recorder is installed. + */ + private static class DummyQueryPerformanceRecorder implements QueryPerformanceRecorder { + + @Override + public QueryPerformanceNugget getNugget(@NotNull final String name, long inputSize) { + return QueryPerformanceNugget.DUMMY_NUGGET; + } + + @Override + public QueryPerformanceNugget getEnclosingNugget() { + return QueryPerformanceNugget.DUMMY_NUGGET; + } + + @Override + public void supplyQueryData(final @NotNull QueryDataConsumer consumer) { + consumer.accept(QueryConstants.NULL_LONG, QueryConstants.NULL_INT, false); + } + + @Override + public QueryPerformanceNugget getQueryLevelPerformanceData() { + return QueryPerformanceNugget.DUMMY_NUGGET; + } + + @Override + public List getOperationLevelPerformanceData() { + return Collections.emptyList(); + } + + @Override + public void accumulate(@NotNull QueryPerformanceRecorder subQuery) { + // no-op + } + + @Override + public boolean hasSubQueries() { + return false; + } + + @Override + public QueryState getState() { + throw new UnsupportedOperationException("Dummy recorder does not support getState()"); + } + + @Override + public SafeCloseable startQuery() { + throw new UnsupportedOperationException("Dummy recorder does not support startQuery()"); + } + + @Override + public boolean endQuery() { + throw new UnsupportedOperationException("Dummy recorder does not support endQuery()"); + } + + @Override + public void suspendQuery() { + throw new UnsupportedOperationException("Dummy recorder does not support suspendQuery()"); + } + + @Override + public SafeCloseable resumeQuery() { + throw new UnsupportedOperationException("Dummy recorder does not support resumeQuery()"); + } + + @Override + public void abortQuery() { + throw new UnsupportedOperationException("Dummy recorder does not support abortQuery()"); + } + } +} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryProcessingResults.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryProcessingResults.java deleted file mode 100644 index afb475ec9ce..00000000000 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryProcessingResults.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending - */ -package io.deephaven.engine.table.impl.perf; - -import io.deephaven.util.QueryConstants; - -import java.io.Serializable; - -public class QueryProcessingResults implements Serializable { - - private static final long serialVersionUID = 2L; - - private final QueryPerformanceRecorder recorder; - - private volatile Boolean isReplayer = QueryConstants.NULL_BOOLEAN; - private volatile String exception = null; - - - public QueryProcessingResults(final QueryPerformanceRecorder recorder) { - this.recorder = recorder; - } - - public Boolean isReplayer() { - return isReplayer; - } - - public void setReplayer(Boolean replayer) { - isReplayer = replayer; - } - - public String getException() { - return exception; - } - - public void setException(String exception) { - this.exception = exception; - } - - public QueryPerformanceRecorder getRecorder() { - return recorder; - } -} diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryState.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryState.java index baa9341e116..8585b970436 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryState.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/QueryState.java @@ -5,5 +5,5 @@ public enum QueryState { - RUNNING, FINISHED, INTERRUPTED + NOT_STARTED, RUNNING, FINISHED, SUSPENDED, INTERRUPTED } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceStreamPublisher.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceStreamPublisher.java index 268ac0ba3ec..4139aefabdf 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceStreamPublisher.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceStreamPublisher.java @@ -9,7 +9,6 @@ import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.perf.UpdatePerformanceTracker.IntervalLevelDetails; import io.deephaven.engine.table.impl.sources.ArrayBackedColumnSource; -import io.deephaven.engine.table.impl.util.EngineMetrics; import io.deephaven.stream.StreamChunkUtils; import io.deephaven.stream.StreamConsumer; import io.deephaven.stream.StreamPublisher; @@ -20,9 +19,8 @@ class UpdatePerformanceStreamPublisher implements StreamPublisher { private static final TableDefinition DEFINITION = TableDefinition.of( - ColumnDefinition.ofString("ProcessUniqueId"), - ColumnDefinition.ofInt("EntryId"), - ColumnDefinition.ofInt("EvaluationNumber"), + ColumnDefinition.ofLong("EntryId"), + ColumnDefinition.ofLong("EvaluationNumber"), ColumnDefinition.ofInt("OperationNumber"), ColumnDefinition.ofString("EntryDescription"), ColumnDefinition.ofString("EntryCallerLine"), @@ -68,33 +66,57 @@ public void register(@NotNull StreamConsumer consumer) { } public synchronized void add(IntervalLevelDetails intervalLevelDetails, PerformanceEntry performanceEntry) { - chunks[0].asWritableObjectChunk().add(EngineMetrics.getProcessInfo().getId().value()); - chunks[1].asWritableIntChunk().add(performanceEntry.getId()); - chunks[2].asWritableIntChunk().add(performanceEntry.getEvaluationNumber()); - chunks[3].asWritableIntChunk().add(performanceEntry.getOperationNumber()); - chunks[4].asWritableObjectChunk().add(performanceEntry.getDescription()); - chunks[5].asWritableObjectChunk().add(performanceEntry.getCallerLine()); - chunks[6].asWritableLongChunk() + // ColumnDefinition.ofInt("EntryId"), + chunks[0].asWritableLongChunk().add(performanceEntry.getId()); + // ColumnDefinition.ofLong("EvaluationNumber"), + chunks[1].asWritableLongChunk().add(performanceEntry.getEvaluationNumber()); + // ColumnDefinition.ofInt("OperationNumber"), + chunks[2].asWritableIntChunk().add(performanceEntry.getOperationNumber()); + // ColumnDefinition.ofString("EntryDescription"), + chunks[3].asWritableObjectChunk().add(performanceEntry.getDescription()); + // ColumnDefinition.ofString("EntryCallerLine"), + chunks[4].asWritableObjectChunk().add(performanceEntry.getCallerLine()); + // ColumnDefinition.ofTime("IntervalStartTime"), + chunks[5].asWritableLongChunk() .add(DateTimeUtils.millisToNanos(intervalLevelDetails.getIntervalStartTimeMillis())); - chunks[7].asWritableLongChunk() + // ColumnDefinition.ofTime("IntervalEndTime"), + chunks[6].asWritableLongChunk() .add(DateTimeUtils.millisToNanos(intervalLevelDetails.getIntervalEndTimeMillis())); - chunks[8].asWritableLongChunk().add(intervalLevelDetails.getIntervalDurationNanos()); - chunks[9].asWritableLongChunk().add(performanceEntry.getIntervalUsageNanos()); - chunks[10].asWritableLongChunk().add(performanceEntry.getIntervalCpuNanos()); - chunks[11].asWritableLongChunk().add(performanceEntry.getIntervalUserCpuNanos()); - chunks[12].asWritableLongChunk().add(performanceEntry.getIntervalAdded()); - chunks[13].asWritableLongChunk().add(performanceEntry.getIntervalRemoved()); - chunks[14].asWritableLongChunk().add(performanceEntry.getIntervalModified()); - chunks[15].asWritableLongChunk().add(performanceEntry.getIntervalShifted()); - chunks[16].asWritableLongChunk().add(performanceEntry.getIntervalInvocationCount()); - chunks[17].asWritableLongChunk().add(performanceEntry.getMinFreeMemory()); - chunks[18].asWritableLongChunk().add(performanceEntry.getMaxTotalMemory()); - chunks[19].asWritableLongChunk().add(performanceEntry.getCollections()); - chunks[20].asWritableLongChunk().add(performanceEntry.getCollectionTimeNanos()); - chunks[21].asWritableLongChunk().add(performanceEntry.getIntervalAllocatedBytes()); - chunks[22].asWritableLongChunk().add(performanceEntry.getIntervalPoolAllocatedBytes()); - chunks[23].asWritableObjectChunk().add(Objects.toString(performanceEntry.getAuthContext())); - chunks[24].asWritableObjectChunk().add(Objects.toString(performanceEntry.getUpdateGraphName())); + // ColumnDefinition.ofLong("IntervalDurationNanos"), + chunks[7].asWritableLongChunk().add(intervalLevelDetails.getIntervalDurationNanos()); + // ColumnDefinition.ofLong("EntryIntervalUsage"), + chunks[8].asWritableLongChunk().add(performanceEntry.getUsageNanos()); + // ColumnDefinition.ofLong("EntryIntervalCpuNanos"), + chunks[9].asWritableLongChunk().add(performanceEntry.getCpuNanos()); + // ColumnDefinition.ofLong("EntryIntervalUserCpuNanos"), + chunks[10].asWritableLongChunk().add(performanceEntry.getUserCpuNanos()); + // ColumnDefinition.ofLong("EntryIntervalAdded"), + chunks[11].asWritableLongChunk().add(performanceEntry.getIntervalAdded()); + // ColumnDefinition.ofLong("EntryIntervalRemoved"), + chunks[12].asWritableLongChunk().add(performanceEntry.getIntervalRemoved()); + // ColumnDefinition.ofLong("EntryIntervalModified"), + chunks[13].asWritableLongChunk().add(performanceEntry.getIntervalModified()); + // ColumnDefinition.ofLong("EntryIntervalShifted"), + chunks[14].asWritableLongChunk().add(performanceEntry.getIntervalShifted()); + // ColumnDefinition.ofLong("EntryIntervalInvocationCount"), + chunks[15].asWritableLongChunk().add(performanceEntry.getIntervalInvocationCount()); + // ColumnDefinition.ofLong("MinFreeMemory"), + chunks[16].asWritableLongChunk().add(performanceEntry.getMinFreeMemory()); + // ColumnDefinition.ofLong("MaxTotalMemory"), + chunks[17].asWritableLongChunk().add(performanceEntry.getMaxTotalMemory()); + // ColumnDefinition.ofLong("Collections"), + chunks[18].asWritableLongChunk().add(performanceEntry.getCollections()); + // ColumnDefinition.ofLong("CollectionTimeNanos"), + chunks[19].asWritableLongChunk().add(performanceEntry.getCollectionTimeNanos()); + // ColumnDefinition.ofLong("EntryIntervalAllocatedBytes"), + chunks[20].asWritableLongChunk().add(performanceEntry.getAllocatedBytes()); + // ColumnDefinition.ofLong("EntryIntervalPoolAllocatedBytes"), + chunks[21].asWritableLongChunk().add(performanceEntry.getPoolAllocatedBytes()); + // ColumnDefinition.ofString("AuthContext"), + chunks[22].asWritableObjectChunk().add(Objects.toString(performanceEntry.getAuthContext())); + // ColumnDefinition.ofString("UpdateGraph")); + chunks[23].asWritableObjectChunk().add(Objects.toString(performanceEntry.getUpdateGraphName())); + if (chunks[0].size() == CHUNK_SIZE) { flushInternal(); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceTracker.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceTracker.java index df7dff85b61..332104884ef 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceTracker.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/perf/UpdatePerformanceTracker.java @@ -32,7 +32,7 @@ import java.util.Objects; import java.util.Queue; import java.util.concurrent.LinkedBlockingDeque; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; /** *

@@ -122,7 +122,7 @@ private synchronized void publish( } } - private static final AtomicInteger entryIdCounter = new AtomicInteger(1); + private static final AtomicLong entryIdCounter = new AtomicLong(1); private final UpdateGraph updateGraph; private final PerformanceEntry aggregatedSmallUpdatesEntry; @@ -137,10 +137,10 @@ private synchronized void publish( public UpdatePerformanceTracker(final UpdateGraph updateGraph) { this.updateGraph = Objects.requireNonNull(updateGraph); this.aggregatedSmallUpdatesEntry = new PerformanceEntry( - QueryConstants.NULL_INT, QueryConstants.NULL_INT, QueryConstants.NULL_INT, + QueryConstants.NULL_LONG, QueryConstants.NULL_LONG, QueryConstants.NULL_INT, "Aggregated Small Updates", null, updateGraph.getName()); this.flushEntry = new PerformanceEntry( - QueryConstants.NULL_INT, QueryConstants.NULL_INT, QueryConstants.NULL_INT, + QueryConstants.NULL_LONG, QueryConstants.NULL_LONG, QueryConstants.NULL_INT, "UpdatePerformanceTracker Flush", null, updateGraph.getName()); } @@ -192,7 +192,7 @@ public final PerformanceEntry getEntry(final String description) { final QueryPerformanceRecorder qpr = QueryPerformanceRecorder.getInstance(); final MutableObject entryMu = new MutableObject<>(); - qpr.setQueryData((evaluationNumber, operationNumber, uninstrumented) -> { + qpr.supplyQueryData((evaluationNumber, operationNumber, uninstrumented) -> { final String effectiveDescription; if (StringUtils.isNullOrEmpty(description) && uninstrumented) { effectiveDescription = QueryPerformanceRecorder.UNINSTRUMENTED_CODE_DESCRIPTION; diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/ConditionFilter.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/ConditionFilter.java index d77db0aeb98..c66db402565 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/ConditionFilter.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/ConditionFilter.java @@ -18,7 +18,6 @@ import io.deephaven.engine.table.impl.util.codegen.CodeGenerator; import io.deephaven.engine.context.QueryScopeParam; import io.deephaven.time.TimeLiteralReplacedExpression; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.ColumnSource; import io.deephaven.chunk.*; @@ -382,8 +381,7 @@ protected void generateFilterCode( final StringBuilder classBody = getClassBody(tableDefinition, timeConversionResult, result); if (classBody == null) return; - final QueryPerformanceNugget nugget = QueryPerformanceRecorder.getInstance().getNugget("Compile:" + formula); - try { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget("Compile:" + formula)) { final List> paramClasses = new ArrayList<>(); final Consumer> addParamClass = (cls) -> { if (cls != null) { @@ -409,8 +407,6 @@ protected void generateFilterCode( filterKernelClass = ExecutionContext.getContext().getQueryCompiler() .compile("GeneratedFilterKernel", this.classBody = classBody.toString(), QueryCompiler.FORMULA_PREFIX, QueryScopeParamTypeUtil.expandParameterClasses(paramClasses)); - } finally { - nugget.done(); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java index f9b97f739fe..c1caeede3c2 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/DhFormulaColumn.java @@ -14,7 +14,6 @@ import io.deephaven.engine.table.impl.MatchPair; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.impl.lang.QueryLanguageParser; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.select.codegen.FormulaAnalyzer; import io.deephaven.engine.table.impl.select.codegen.JavaKernelBuilder; @@ -32,6 +31,7 @@ import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; import io.deephaven.time.TimeLiteralReplacedExpression; +import io.deephaven.util.SafeCloseable; import io.deephaven.util.type.TypeUtils; import io.deephaven.vector.ObjectVector; import io.deephaven.vector.Vector; @@ -772,8 +772,7 @@ private FormulaFactory createFormulaFactory() { @SuppressWarnings("SameParameterValue") private Class compileFormula(final String what, final String classBody, final String className) { // System.out.printf("compileFormula: what is %s. Code is...%n%s%n", what, classBody); - try (final QueryPerformanceNugget ignored = - QueryPerformanceRecorder.getInstance().getNugget("Compile:" + what)) { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget("Compile:" + what)) { // Compilation needs to take place with elevated privileges, but the created object should not have them. final List> paramClasses = new ArrayList<>(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/JavaKernelBuilder.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/JavaKernelBuilder.java index e34be36fc18..94702af0e11 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/JavaKernelBuilder.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/select/codegen/JavaKernelBuilder.java @@ -5,9 +5,9 @@ import io.deephaven.engine.context.QueryCompiler; import io.deephaven.engine.context.ExecutionContext; +import io.deephaven.util.SafeCloseable; import io.deephaven.vector.Vector; import io.deephaven.engine.context.QueryScopeParam; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.select.Formula; import io.deephaven.engine.table.impl.select.DhFormulaColumn; @@ -260,8 +260,7 @@ private List visitFormulaParameters( @SuppressWarnings("SameParameterValue") private static Class compileFormula(final String what, final String classBody, final String className) { // System.out.printf("compileFormula: formulaString is %s. Code is...%n%s%n", what, classBody); - try (final QueryPerformanceNugget nugget = - QueryPerformanceRecorder.getInstance().getNugget("Compile:" + what)) { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget("Compile:" + what)) { // Compilation needs to take place with elevated privileges, but the created object should not have them. final QueryCompiler compiler = ExecutionContext.getContext().getQueryCompiler(); return compiler.compile(className, classBody, QueryCompiler.FORMULA_PREFIX); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java index 6cf58505ba4..245d312793f 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/updateby/UpdateBy.java @@ -22,7 +22,6 @@ import io.deephaven.engine.table.impl.*; import io.deephaven.engine.table.impl.perf.BasePerformanceEntry; import io.deephaven.engine.table.impl.perf.PerformanceEntry; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.sources.*; import io.deephaven.engine.table.impl.sources.sparse.SparseConstants; @@ -908,19 +907,14 @@ private void cleanUpAndNotify(final Runnable onCleanupComplete) { final BasePerformanceEntry accumulated = jobScheduler.getAccumulatedPerformance(); if (accumulated != null) { if (initialStep) { - final QueryPerformanceNugget outerNugget = QueryPerformanceRecorder.getInstance().getOuterNugget(); - if (outerNugget != null) { - outerNugget.addBaseEntry(accumulated); - } + QueryPerformanceRecorder.getInstance().getEnclosingNugget().accumulate(accumulated); } else { source.getUpdateGraph().addNotification(new TerminalNotification() { @Override public void run() { - synchronized (accumulated) { - final PerformanceEntry entry = sourceListener().getEntry(); - if (entry != null) { - entry.accumulate(accumulated); - } + final PerformanceEntry entry = sourceListener().getEntry(); + if (entry != null) { + entry.accumulate(accumulated); } } }); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorImpl.java index f13d3799136..45cf2166e00 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorImpl.java @@ -39,7 +39,7 @@ public void add( @Nullable TableListener.Entry entry, @Nullable TableListener.Entry sourceEntry, Throwable originalException) { - final int evaluationNumber; + final long evaluationNumber; final int operationNumber; final String description; if (entry instanceof PerformanceEntry) { @@ -52,7 +52,7 @@ public void add( operationNumber = QueryConstants.NULL_INT; description = null; } - final int sourceEvaluationNumber; + final long sourceEvaluationNumber; final int sourceOperationNumber; final String sourceDescription; if (sourceEntry instanceof PerformanceEntry) { diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorStreamPublisher.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorStreamPublisher.java index f719fe2d623..d891df9e041 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorStreamPublisher.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/AsyncErrorStreamPublisher.java @@ -20,10 +20,10 @@ class AsyncErrorStreamPublisher implements StreamPublisher { private static final TableDefinition DEFINITION = TableDefinition.of( ColumnDefinition.ofTime("Time"), - ColumnDefinition.ofInt("EvaluationNumber"), + ColumnDefinition.ofLong("EvaluationNumber"), ColumnDefinition.ofInt("OperationNumber"), ColumnDefinition.ofString("Description"), - ColumnDefinition.ofInt("SourceQueryEvaluationNumber"), + ColumnDefinition.ofLong("SourceQueryEvaluationNumber"), ColumnDefinition.ofInt("SourceQueryOperationNumber"), ColumnDefinition.ofString("SourceQueryDescription"), ColumnDefinition.of("Cause", Type.ofCustom(Throwable.class))); @@ -51,18 +51,18 @@ public void register(@NotNull StreamConsumer consumer) { public synchronized void add( long timeNanos, - int evaluationNumber, + long evaluationNumber, int operationNumber, String description, - int sourceQueryEvaluationNumber, + long sourceQueryEvaluationNumber, int sourceQueryOperationNumber, String sourceQueryDescription, Throwable cause) { chunks[0].asWritableLongChunk().add(timeNanos); - chunks[1].asWritableIntChunk().add(evaluationNumber); + chunks[1].asWritableLongChunk().add(evaluationNumber); chunks[2].asWritableIntChunk().add(operationNumber); chunks[3].asWritableObjectChunk().add(description); - chunks[4].asWritableIntChunk().add(sourceQueryEvaluationNumber); + chunks[4].asWritableLongChunk().add(sourceQueryEvaluationNumber); chunks[5].asWritableIntChunk().add(sourceQueryOperationNumber); chunks[6].asWritableObjectChunk().add(sourceQueryDescription); chunks[7].asWritableObjectChunk().add(cause); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/EngineMetrics.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/EngineMetrics.java index ee32cea7f69..e55dbcc3c8e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/EngineMetrics.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/EngineMetrics.java @@ -4,9 +4,12 @@ package io.deephaven.engine.table.impl.util; import io.deephaven.base.clock.Clock; +import io.deephaven.base.verify.Require; import io.deephaven.configuration.Configuration; import io.deephaven.engine.table.impl.BlinkTableTools; import io.deephaven.engine.table.impl.QueryTable; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.tablelogger.EngineTableLoggers; import io.deephaven.engine.tablelogger.QueryOperationPerformanceLogLogger; import io.deephaven.engine.tablelogger.QueryPerformanceLogLogger; @@ -16,10 +19,14 @@ import io.deephaven.process.ProcessInfoConfig; import io.deephaven.stats.Driver; import io.deephaven.stats.StatsIntradayLogger; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.io.IOException; +import java.util.List; public class EngineMetrics { + private static final Logger log = LoggerFactory.getLogger(EngineMetrics.class); private static final boolean STATS_LOGGING_ENABLED = Configuration.getInstance().getBooleanWithDefault( "statsLoggingEnabled", true); private static volatile ProcessInfo PROCESS_INFO; @@ -68,9 +75,8 @@ private EngineMetrics() { } catch (IOException e) { log.fatal().append("Failed to configure process info: ").append(e.toString()).endl(); } - qpImpl = new QueryPerformanceImpl(pInfo.getId(), tableLoggerFactory.queryPerformanceLogLogger()); - qoplImpl = new QueryOperationPerformanceImpl(pInfo.getId(), - tableLoggerFactory.queryOperationPerformanceLogLogger()); + qpImpl = new QueryPerformanceImpl(tableLoggerFactory.queryPerformanceLogLogger()); + qoplImpl = new QueryOperationPerformanceImpl(tableLoggerFactory.queryOperationPerformanceLogLogger()); if (STATS_LOGGING_ENABLED) { statsImpl = new StatsImpl(pInfo.getId(), tableLoggerFactory.processMetricsLogLogger()); } else { @@ -106,6 +112,31 @@ private StatsIntradayLogger getStatsLogger() { return statsImpl; } + public void logQueryProcessingResults( + @NotNull final QueryPerformanceRecorder recorder, + @Nullable final Exception exception) { + final QueryPerformanceLogLogger qplLogger = getQplLogger(); + final QueryOperationPerformanceLogLogger qoplLogger = getQoplLogger(); + try { + final QueryPerformanceNugget queryNugget = Require.neqNull( + recorder.getQueryLevelPerformanceData(), + "queryProcessingResults.getRecorder().getQueryLevelPerformanceData()"); + + synchronized (qplLogger) { + qplLogger.log(queryNugget, exception); + } + final List nuggets = + recorder.getOperationLevelPerformanceData(); + synchronized (qoplLogger) { + for (QueryPerformanceNugget nugget : nuggets) { + qoplLogger.log(nugget); + } + } + } catch (final Exception e) { + log.error().append("Failed to log query performance data: ").append(e).endl(); + } + } + public static boolean maybeStartStatsCollection() { if (!EngineMetrics.STATS_LOGGING_ENABLED) { return false; diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/OperationInitializationPoolJobScheduler.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/OperationInitializationPoolJobScheduler.java index 7037dd34811..2722d61fd35 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/OperationInitializationPoolJobScheduler.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/OperationInitializationPoolJobScheduler.java @@ -32,9 +32,7 @@ public void submit( throw e; } finally { basePerformanceEntry.onBaseEntryEnd(); - synchronized (accumulatedBaseEntry) { - accumulatedBaseEntry.accumulate(basePerformanceEntry); - } + accumulatedBaseEntry.accumulate(basePerformanceEntry); } }); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceImpl.java index c95c4b0a236..d43d8ab1ce2 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceImpl.java @@ -7,23 +7,21 @@ import io.deephaven.engine.table.Table; import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.tablelogger.QueryOperationPerformanceLogLogger; -import io.deephaven.process.ProcessUniqueId; import io.deephaven.stream.StreamToBlinkTableAdapter; import io.deephaven.tablelogger.Row.Flags; +import org.jetbrains.annotations.NotNull; import java.io.IOException; import java.util.Objects; class QueryOperationPerformanceImpl implements QueryOperationPerformanceLogLogger { - private final ProcessUniqueId id; private final QueryOperationPerformanceLogLogger qoplLogger; private final QueryOperationPerformanceStreamPublisher publisher; @SuppressWarnings("FieldCanBeLocal") private final StreamToBlinkTableAdapter adapter; private final Table blink; - public QueryOperationPerformanceImpl(ProcessUniqueId id, QueryOperationPerformanceLogLogger qoplLogger) { - this.id = Objects.requireNonNull(id); + public QueryOperationPerformanceImpl(QueryOperationPerformanceLogLogger qoplLogger) { this.qoplLogger = Objects.requireNonNull(qoplLogger); this.publisher = new QueryOperationPerformanceStreamPublisher(); this.adapter = new StreamToBlinkTableAdapter( @@ -39,8 +37,10 @@ public Table blinkTable() { } @Override - public void log(Flags flags, int operationNumber, QueryPerformanceNugget nugget) throws IOException { - publisher.add(id.value(), operationNumber, nugget); - qoplLogger.log(flags, operationNumber, nugget); + public void log( + @NotNull final Flags flags, + @NotNull final QueryPerformanceNugget nugget) throws IOException { + publisher.add(nugget); + qoplLogger.log(flags, nugget); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceStreamPublisher.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceStreamPublisher.java index 223549a6fc9..c0529be8681 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceStreamPublisher.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryOperationPerformanceStreamPublisher.java @@ -12,9 +12,7 @@ import io.deephaven.stream.StreamChunkUtils; import io.deephaven.stream.StreamConsumer; import io.deephaven.stream.StreamPublisher; -import io.deephaven.time.DateTimeUtils; import io.deephaven.util.BooleanUtils; -import io.deephaven.util.QueryConstants; import org.jetbrains.annotations.NotNull; import java.util.Objects; @@ -22,19 +20,22 @@ class QueryOperationPerformanceStreamPublisher implements StreamPublisher { private static final TableDefinition DEFINITION = TableDefinition.of( - ColumnDefinition.ofString("ProcessUniqueId"), - ColumnDefinition.ofInt("EvaluationNumber"), + ColumnDefinition.ofLong("EvaluationNumber"), + ColumnDefinition.ofLong("ParentEvaluationNumber"), ColumnDefinition.ofInt("OperationNumber"), + ColumnDefinition.ofInt("ParentOperationNumber"), ColumnDefinition.ofInt("Depth"), ColumnDefinition.ofString("Description"), + ColumnDefinition.ofString("SessionId"), ColumnDefinition.ofString("CallerLine"), - ColumnDefinition.ofBoolean("IsTopLevel"), ColumnDefinition.ofBoolean("IsCompilation"), ColumnDefinition.ofTime("StartTime"), ColumnDefinition.ofTime("EndTime"), ColumnDefinition.ofLong("DurationNanos"), ColumnDefinition.ofLong("CpuNanos"), ColumnDefinition.ofLong("UserCpuNanos"), + ColumnDefinition.ofLong("FreeMemory"), + ColumnDefinition.ofLong("TotalMemory"), ColumnDefinition.ofLong("FreeMemoryChange"), ColumnDefinition.ofLong("TotalMemoryChange"), ColumnDefinition.ofLong("Collections"), @@ -65,36 +66,83 @@ public void register(@NotNull StreamConsumer consumer) { this.consumer = Objects.requireNonNull(consumer); } - public synchronized void add( - final String id, - final int operationNumber, - final QueryPerformanceNugget nugget) { - - chunks[0].asWritableObjectChunk().add(id); - chunks[1].asWritableIntChunk().add(nugget.getEvaluationNumber()); - chunks[2].asWritableIntChunk().add(operationNumber); - chunks[3].asWritableIntChunk().add(nugget.getDepth()); - chunks[4].asWritableObjectChunk().add(nugget.getName()); - chunks[5].asWritableObjectChunk().add(nugget.getCallerLine()); - chunks[6].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.isTopLevel())); - chunks[7].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.getName().startsWith("Compile:"))); - chunks[8].asWritableLongChunk().add(DateTimeUtils.millisToNanos(nugget.getStartClockTime())); - // this is a lie; timestamps should _NOT_ be created based on adding nano time durations to timestamps. - chunks[9].asWritableLongChunk().add(nugget.getTotalTimeNanos() == null ? QueryConstants.NULL_LONG - : DateTimeUtils.millisToNanos(nugget.getStartClockTime()) + nugget.getTotalTimeNanos()); - chunks[10].asWritableLongChunk() - .add(nugget.getTotalTimeNanos() == null ? QueryConstants.NULL_LONG : nugget.getTotalTimeNanos()); - chunks[11].asWritableLongChunk().add(nugget.getCpuNanos()); - chunks[12].asWritableLongChunk().add(nugget.getUserCpuNanos()); - chunks[13].asWritableLongChunk().add(nugget.getEndFreeMemory()); - chunks[14].asWritableLongChunk().add(nugget.getEndTotalMemory()); - chunks[15].asWritableLongChunk().add(nugget.getDiffFreeMemory()); - chunks[16].asWritableLongChunk().add(nugget.getDiffTotalMemory()); - chunks[17].asWritableLongChunk().add(nugget.getDiffCollectionTimeNanos()); - chunks[18].asWritableLongChunk().add(nugget.getAllocatedBytes()); - chunks[19].asWritableLongChunk().add(nugget.getPoolAllocatedBytes()); - chunks[20].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.wasInterrupted())); - chunks[21].asWritableObjectChunk().add(Objects.toString(nugget.getAuthContext())); + public synchronized void add(final QueryPerformanceNugget nugget) { + + // ColumnDefinition.ofLong("EvaluationNumber"), + chunks[0].asWritableLongChunk().add(nugget.getEvaluationNumber()); + + // ColumnDefinition.ofLong("ParentEvaluationNumber"), + chunks[1].asWritableLongChunk().add(nugget.getParentEvaluationNumber()); + + // ColumnDefinition.ofInt("OperationNumber"), + chunks[2].asWritableIntChunk().add(nugget.getOperationNumber()); + + // ColumnDefinition.ofInt("ParentOperationNumber"), + chunks[3].asWritableIntChunk().add(nugget.getParentOperationNumber()); + + // ColumnDefinition.ofInt("Depth"), + chunks[4].asWritableIntChunk().add(nugget.getDepth()); + + // ColumnDefinition.ofString("Description"), + chunks[5].asWritableObjectChunk().add(nugget.getDescription()); + + // ColumnDefinition.ofString("SessionId"), + chunks[6].asWritableObjectChunk().add(nugget.getSessionId()); + + // ColumnDefinition.ofString("CallerLine"), + chunks[7].asWritableObjectChunk().add(nugget.getCallerLine()); + + // ColumnDefinition.ofBoolean("IsCompilation"), + chunks[8].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.getDescription().startsWith("Compile:"))); + + // ColumnDefinition.ofTime("StartTime"), + chunks[9].asWritableLongChunk().add(nugget.getStartClockEpochNanos()); + + // ColumnDefinition.ofTime("EndTime"), + chunks[10].asWritableLongChunk().add(nugget.getEndClockEpochNanos()); + + // ColumnDefinition.ofLong("DurationNanos"), + chunks[11].asWritableLongChunk().add(nugget.getUsageNanos()); + + // ColumnDefinition.ofLong("CpuNanos"), + chunks[12].asWritableLongChunk().add(nugget.getCpuNanos()); + + // ColumnDefinition.ofLong("UserCpuNanos"), + chunks[13].asWritableLongChunk().add(nugget.getUserCpuNanos()); + + // ColumnDefinition.ofLong("FreeMemory"), + chunks[14].asWritableLongChunk().add(nugget.getEndFreeMemory()); + + // ColumnDefinition.ofLong("TotalMemory"), + chunks[15].asWritableLongChunk().add(nugget.getEndTotalMemory()); + + // ColumnDefinition.ofLong("FreeMemoryChange"), + chunks[16].asWritableLongChunk().add(nugget.getDiffFreeMemory()); + + // ColumnDefinition.ofLong("TotalMemoryChange"), + chunks[17].asWritableLongChunk().add(nugget.getDiffTotalMemory()); + + // ColumnDefinition.ofLong("Collections") + chunks[18].asWritableLongChunk().add(nugget.getDiffCollections()); + + // ColumnDefinition.ofLong("CollectionTimeNanos"), + chunks[19].asWritableLongChunk().add(nugget.getDiffCollectionTimeNanos()); + + // ColumnDefinition.ofLong("AllocatedBytes"), + chunks[20].asWritableLongChunk().add(nugget.getAllocatedBytes()); + + // ColumnDefinition.ofLong("PoolAllocatedBytes"), + chunks[21].asWritableLongChunk().add(nugget.getPoolAllocatedBytes()); + + // ColumnDefinition.ofLong("InputSizeLong"), + chunks[22].asWritableLongChunk().add(nugget.getInputSize()); + + // ColumnDefinition.ofBoolean("WasInterrupted") + chunks[23].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.wasInterrupted())); + + // ColumnDefinition.ofString("AuthContext") + chunks[24].asWritableObjectChunk().add(Objects.toString(nugget.getAuthContext())); + if (chunks[0].size() == CHUNK_SIZE) { flushInternal(); } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceImpl.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceImpl.java index b092073921b..355dabe72ae 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceImpl.java @@ -6,25 +6,23 @@ import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; -import io.deephaven.engine.table.impl.perf.QueryProcessingResults; import io.deephaven.engine.tablelogger.QueryPerformanceLogLogger; -import io.deephaven.process.ProcessUniqueId; import io.deephaven.stream.StreamToBlinkTableAdapter; import io.deephaven.tablelogger.Row.Flags; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.io.IOException; import java.util.Objects; class QueryPerformanceImpl implements QueryPerformanceLogLogger { - private final ProcessUniqueId id; private final QueryPerformanceLogLogger qplLogger; private final QueryPerformanceStreamPublisher publisher; @SuppressWarnings("FieldCanBeLocal") private final StreamToBlinkTableAdapter adapter; private final Table blink; - public QueryPerformanceImpl(ProcessUniqueId id, QueryPerformanceLogLogger qplLogger) { - this.id = Objects.requireNonNull(id); + public QueryPerformanceImpl(QueryPerformanceLogLogger qplLogger) { this.qplLogger = Objects.requireNonNull(qplLogger); this.publisher = new QueryPerformanceStreamPublisher(); this.adapter = new StreamToBlinkTableAdapter( @@ -40,9 +38,11 @@ public Table blinkTable() { } @Override - public void log(Flags flags, long evaluationNumber, QueryProcessingResults queryProcessingResults, - QueryPerformanceNugget nugget) throws IOException { - publisher.add(id.value(), evaluationNumber, queryProcessingResults, nugget); - qplLogger.log(flags, evaluationNumber, queryProcessingResults, nugget); + public void log( + @NotNull final Flags flags, + @NotNull final QueryPerformanceNugget nugget, + @Nullable final Exception exception) throws IOException { + publisher.add(nugget, exception); + qplLogger.log(flags, nugget, exception); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceStreamPublisher.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceStreamPublisher.java index adb4511a71e..6923360f032 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceStreamPublisher.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/QueryPerformanceStreamPublisher.java @@ -8,23 +8,23 @@ import io.deephaven.engine.table.ColumnDefinition; import io.deephaven.engine.table.TableDefinition; import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; -import io.deephaven.engine.table.impl.perf.QueryProcessingResults; import io.deephaven.engine.table.impl.sources.ArrayBackedColumnSource; import io.deephaven.stream.StreamChunkUtils; import io.deephaven.stream.StreamConsumer; import io.deephaven.stream.StreamPublisher; -import io.deephaven.time.DateTimeUtils; import io.deephaven.util.BooleanUtils; -import io.deephaven.util.QueryConstants; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.util.Objects; class QueryPerformanceStreamPublisher implements StreamPublisher { private static final TableDefinition DEFINITION = TableDefinition.of( - ColumnDefinition.ofString("ProcessUniqueId"), ColumnDefinition.ofLong("EvaluationNumber"), + ColumnDefinition.ofLong("ParentEvaluationNumber"), + ColumnDefinition.ofString("Description"), + ColumnDefinition.ofString("SessionId"), ColumnDefinition.ofTime("StartTime"), ColumnDefinition.ofTime("EndTime"), ColumnDefinition.ofLong("DurationNanos"), @@ -39,7 +39,6 @@ class QueryPerformanceStreamPublisher implements StreamPublisher { ColumnDefinition.ofLong("AllocatedBytes"), ColumnDefinition.ofLong("PoolAllocatedBytes"), ColumnDefinition.ofBoolean("WasInterrupted"), - ColumnDefinition.ofBoolean("IsReplayer"), ColumnDefinition.ofString("Exception"), ColumnDefinition.ofString("AuthContext")); private static final int CHUNK_SIZE = ArrayBackedColumnSource.BLOCK_SIZE; @@ -64,69 +63,68 @@ public void register(@NotNull StreamConsumer consumer) { } public synchronized void add( - final String id, - final long evaluationNumber, - final QueryProcessingResults queryProcessingResults, - final QueryPerformanceNugget nugget) { - // ColumnDefinition.ofString("ProcessUniqueId"), - chunks[0].asWritableObjectChunk().add(id); + @NotNull final QueryPerformanceNugget nugget, + @Nullable final Exception exception) { // ColumnDefinition.ofLong("EvaluationNumber") - chunks[1].asWritableLongChunk().add(evaluationNumber); + chunks[0].asWritableLongChunk().add(nugget.getEvaluationNumber()); + + // ColumnDefinition.ofLong("ParentEvaluationNumber") + chunks[1].asWritableLongChunk().add(nugget.getParentEvaluationNumber()); + + // ColumnDefinition.ofString("Description") + chunks[2].asWritableObjectChunk().add(nugget.getDescription()); + + // ColumnDefinition.ofString("SessionId") + chunks[3].asWritableObjectChunk().add(nugget.getSessionId()); // ColumnDefinition.ofTime("StartTime"); - chunks[2].asWritableLongChunk().add(DateTimeUtils.millisToNanos(nugget.getStartClockTime())); + chunks[4].asWritableLongChunk().add(nugget.getStartClockEpochNanos()); // ColumnDefinition.ofTime("EndTime") - // this is a lie; timestamps should _NOT_ be created based on adding nano time durations to timestamps. - chunks[3].asWritableLongChunk().add(nugget.getTotalTimeNanos() == null ? QueryConstants.NULL_LONG - : DateTimeUtils.millisToNanos(nugget.getStartClockTime()) + nugget.getTotalTimeNanos()); + chunks[5].asWritableLongChunk().add(nugget.getEndClockEpochNanos()); // ColumnDefinition.ofLong("DurationNanos") - chunks[4].asWritableLongChunk() - .add(nugget.getTotalTimeNanos() == null ? QueryConstants.NULL_LONG : nugget.getTotalTimeNanos()); + chunks[6].asWritableLongChunk().add(nugget.getUsageNanos()); // ColumnDefinition.ofLong("CpuNanos") - chunks[5].asWritableLongChunk().add(nugget.getCpuNanos()); + chunks[7].asWritableLongChunk().add(nugget.getCpuNanos()); // ColumnDefinition.ofLong("UserCpuNanos") - chunks[6].asWritableLongChunk().add(nugget.getUserCpuNanos()); + chunks[8].asWritableLongChunk().add(nugget.getUserCpuNanos()); // ColumnDefinition.ofLong("FreeMemory") - chunks[7].asWritableLongChunk().add(nugget.getEndFreeMemory()); + chunks[9].asWritableLongChunk().add(nugget.getEndFreeMemory()); // ColumnDefinition.ofLong("TotalMemory") - chunks[8].asWritableLongChunk().add(nugget.getEndTotalMemory()); + chunks[10].asWritableLongChunk().add(nugget.getEndTotalMemory()); // ColumnDefinition.ofLong("FreeMemoryChange") - chunks[9].asWritableLongChunk().add(nugget.getDiffFreeMemory()); + chunks[11].asWritableLongChunk().add(nugget.getDiffFreeMemory()); // ColumnDefinition.ofLong("TotalMemoryChange") - chunks[10].asWritableLongChunk().add(nugget.getDiffTotalMemory()); + chunks[12].asWritableLongChunk().add(nugget.getDiffTotalMemory()); // ColumnDefinition.ofLong("Collections") - chunks[11].asWritableLongChunk().add(nugget.getDiffCollections()); + chunks[13].asWritableLongChunk().add(nugget.getDiffCollections()); // ColumnDefinition.ofLong("CollectionTimeNanos") - chunks[12].asWritableLongChunk().add(nugget.getDiffCollectionTimeNanos()); + chunks[14].asWritableLongChunk().add(nugget.getDiffCollectionTimeNanos()); // ColumnDefinition.ofLong("AllocatedBytes") - chunks[13].asWritableLongChunk().add(nugget.getAllocatedBytes()); + chunks[15].asWritableLongChunk().add(nugget.getAllocatedBytes()); // ColumnDefinition.ofLong("PoolAllocatedBytes") - chunks[14].asWritableLongChunk().add(nugget.getPoolAllocatedBytes()); + chunks[16].asWritableLongChunk().add(nugget.getPoolAllocatedBytes()); // ColumnDefinition.ofBoolean("WasInterrupted") - chunks[15].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.wasInterrupted())); - - // ColumnDefinition.ofBoolean("IsReplayer") - chunks[16].asWritableByteChunk().add(BooleanUtils.booleanAsByte(queryProcessingResults.isReplayer())); + chunks[17].asWritableByteChunk().add(BooleanUtils.booleanAsByte(nugget.wasInterrupted())); // ColumnDefinition.ofString("Exception") - chunks[17].asWritableObjectChunk().add(queryProcessingResults.getException()); + chunks[18].asWritableObjectChunk().add(exception == null ? null : exception.getMessage()); // ColumnDefinition.ofString("AuthContext") - chunks[18].asWritableObjectChunk().add(Objects.toString(nugget.getAuthContext())); + chunks[19].asWritableObjectChunk().add(Objects.toString(nugget.getAuthContext())); if (chunks[0].size() == CHUNK_SIZE) { flushInternal(); diff --git a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/UpdateGraphJobScheduler.java b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/UpdateGraphJobScheduler.java index 2d799e0582b..345b08aa24e 100644 --- a/engine/table/src/main/java/io/deephaven/engine/table/impl/util/UpdateGraphJobScheduler.java +++ b/engine/table/src/main/java/io/deephaven/engine/table/impl/util/UpdateGraphJobScheduler.java @@ -47,9 +47,7 @@ public void run() { throw e; } finally { baseEntry.onBaseEntryEnd(); - synchronized (accumulatedBaseEntry) { - accumulatedBaseEntry.accumulate(baseEntry); - } + accumulatedBaseEntry.accumulate(baseEntry); } } diff --git a/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryOperationPerformanceLogLogger.java b/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryOperationPerformanceLogLogger.java index a981c646b09..1b828022a00 100644 --- a/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryOperationPerformanceLogLogger.java +++ b/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryOperationPerformanceLogLogger.java @@ -3,6 +3,7 @@ import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.tablelogger.Row; import io.deephaven.tablelogger.Row.Flags; +import org.jetbrains.annotations.NotNull; import java.io.IOException; @@ -13,18 +14,18 @@ * queries. */ public interface QueryOperationPerformanceLogLogger { - default void log(final int operationNumber, final QueryPerformanceNugget nugget) throws IOException { - log(DEFAULT_INTRADAY_LOGGER_FLAGS, operationNumber, nugget); + default void log(@NotNull final QueryPerformanceNugget nugget) throws IOException { + log(DEFAULT_INTRADAY_LOGGER_FLAGS, nugget); } - void log(final Row.Flags flags, final int operationNumber, final QueryPerformanceNugget nugget) throws IOException; + void log(@NotNull Row.Flags flags, @NotNull QueryPerformanceNugget nugget) throws IOException; enum Noop implements QueryOperationPerformanceLogLogger { INSTANCE; @Override - public void log(Flags flags, int operationNumber, QueryPerformanceNugget nugget) throws IOException { - - } + public void log( + @NotNull final Flags flags, + @NotNull final QueryPerformanceNugget nugget) throws IOException {} } } diff --git a/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryPerformanceLogLogger.java b/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryPerformanceLogLogger.java index 28a46f61a61..97a7aa0c834 100644 --- a/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryPerformanceLogLogger.java +++ b/engine/table/src/main/java/io/deephaven/engine/tablelogger/QueryPerformanceLogLogger.java @@ -1,34 +1,37 @@ package io.deephaven.engine.tablelogger; import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; -import io.deephaven.engine.table.impl.perf.QueryProcessingResults; import io.deephaven.tablelogger.Row; -import io.deephaven.tablelogger.Row.Flags; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import java.io.IOException; +import static io.deephaven.tablelogger.TableLogger.DEFAULT_INTRADAY_LOGGER_FLAGS; + /** * Logs data that describes the query-level performance for each worker. A given worker may be running multiple queries; * each will have its own set of query performance log entries. */ -import static io.deephaven.tablelogger.TableLogger.DEFAULT_INTRADAY_LOGGER_FLAGS; - public interface QueryPerformanceLogLogger { - default void log(final long evaluationNumber, final QueryProcessingResults queryProcessingResults, - final QueryPerformanceNugget nugget) throws IOException { - log(DEFAULT_INTRADAY_LOGGER_FLAGS, evaluationNumber, queryProcessingResults, nugget); + default void log( + @NotNull final QueryPerformanceNugget nugget, + @Nullable final Exception exception) throws IOException { + log(DEFAULT_INTRADAY_LOGGER_FLAGS, nugget, exception); } - void log(final Row.Flags flags, final long evaluationNumber, final QueryProcessingResults queryProcessingResults, - final QueryPerformanceNugget nugget) throws IOException; + void log( + @NotNull final Row.Flags flags, + @NotNull final QueryPerformanceNugget nugget, + @Nullable final Exception exception) throws IOException; enum Noop implements QueryPerformanceLogLogger { INSTANCE; @Override - public void log(Flags flags, long evaluationNumber, QueryProcessingResults queryProcessingResults, - QueryPerformanceNugget nugget) throws IOException { - - } + public void log( + @NotNull final Row.Flags flags, + @NotNull final QueryPerformanceNugget nugget, + @Nullable final Exception exception) {} } } diff --git a/engine/table/src/main/java/io/deephaven/engine/util/TableShowTools.java b/engine/table/src/main/java/io/deephaven/engine/util/TableShowTools.java index ac65b727082..4867ab01de6 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/TableShowTools.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/TableShowTools.java @@ -5,11 +5,11 @@ import io.deephaven.datastructures.util.CollectionUtil; import io.deephaven.engine.table.Table; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.ColumnSource; import io.deephaven.engine.rowset.RowSet; import io.deephaven.time.DateTimeUtils; +import io.deephaven.util.SafeCloseable; import io.deephaven.util.type.ArrayTypeUtils; import java.io.PrintStream; @@ -28,8 +28,7 @@ class TableShowTools { static void showInternal(Table source, long firstRow, long lastRowExclusive, ZoneId timeZone, String delimiter, PrintStream out, boolean showRowSet, String[] columns) { - final QueryPerformanceNugget nugget = QueryPerformanceRecorder.getInstance().getNugget("TableTools.show()"); - try { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget("TableTools.show()")) { if (columns.length == 0) { final List columnNames = source.getDefinition().getColumnNames(); columns = columnNames.toArray(CollectionUtil.ZERO_LENGTH_STRING_ARRAY); @@ -107,8 +106,6 @@ static void showInternal(Table source, long firstRow, long lastRowExclusive, Zon } out.println(); out.flush(); - } finally { - nugget.done(); } } diff --git a/engine/table/src/test/java/io/deephaven/engine/table/impl/select/TestConstantFormulaEvaluation.java b/engine/table/src/test/java/io/deephaven/engine/table/impl/select/TestConstantFormulaEvaluation.java index c3db84fd0bd..da3be26e696 100644 --- a/engine/table/src/test/java/io/deephaven/engine/table/impl/select/TestConstantFormulaEvaluation.java +++ b/engine/table/src/test/java/io/deephaven/engine/table/impl/select/TestConstantFormulaEvaluation.java @@ -7,12 +7,12 @@ import io.deephaven.engine.table.Table; import io.deephaven.engine.table.impl.QueryTable; import io.deephaven.engine.table.impl.lang.JavaExpressionParser; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.sources.SingleValueColumnSource; import io.deephaven.engine.testutil.TstUtils; import io.deephaven.engine.testutil.junit4.EngineCleanup; import io.deephaven.engine.util.TableTools; +import io.deephaven.util.SafeCloseable; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; @@ -190,8 +190,7 @@ public void constantLongValueTest() { private void singleColumnConstantValueFormulaTest(final String formula, final Class columnType, final T columnRowValue, final int tableLength, final String description) { - final QueryPerformanceNugget nugget = QueryPerformanceRecorder.getInstance().getNugget(description); - try { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget(description)) { final Table source = TableTools.emptyTable(tableLength).update(formula); String[] columns = source.getDefinition().getColumnNamesArray(); Assert.assertEquals("length of columns = 1", 1, columns.length); @@ -202,8 +201,6 @@ private void singleColumnConstantValueFormulaTest(final String formula, fina Assert.assertEquals(columnType, source.getColumnSource(columns[0]).getType()); Assert.assertEquals(columnRowValue, source.getColumnSource(columns[0]).get(key)); }); - } finally { - nugget.done(); } } @@ -230,8 +227,7 @@ public void threeColumnConstantValueFormulaTest() { private void threeColumnConstantValueFormulaTest(final String[] formulas, final Class calculatedColType, final T expectedConstValue, final ColumnFormula columnFormula, final int tableLength, final String description) { - final QueryPerformanceNugget nugget = QueryPerformanceRecorder.getInstance().getNugget(description); - try { + try (final SafeCloseable nugget = QueryPerformanceRecorder.getInstance().getNugget(description)) { final Table source = TableTools.emptyTable(tableLength).update(formulas); String[] columns = source.getDefinition().getColumnNamesArray(); boolean constantValueColFound = false; @@ -262,16 +258,12 @@ private void threeColumnConstantValueFormulaTest(final String[] formulas, fi (T) source.getColumnSource(columns[1]).get(key)); Assert.assertEquals(expected, source.getColumnSource(columns[2]).get(key)); }); - } finally { - nugget.done(); } } @Test public void queryScopeForAtomicIntPlusConstantFormulaTest() { - final QueryPerformanceNugget nugget = QueryPerformanceRecorder.getInstance() - .getNugget("queryScopeForAtomicInt"); - try { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget("queryScopeForAtomicInt")) { final AtomicInteger atomicValue = new AtomicInteger(1); QueryScope.addParam("atomicValue", atomicValue); String[] formulas = new String[] { @@ -309,8 +301,6 @@ public void queryScopeForAtomicIntPlusConstantFormulaTest() { Assert.assertEquals("Calculate Col verification", expectedCalculatedColValue, source.getColumnSource(columns[2]).get(key)); }); - } finally { - nugget.done(); } } @@ -379,9 +369,7 @@ public void testRefreshingTableForConstantFormulaColumnSource() { @SuppressWarnings("SameParameterValue") private void checkConstantFormula(final Table source, final Set expectedConstValueColumns, final T[] expectedConstValues, final Class calculatedColType) { - final QueryPerformanceNugget nugget = - QueryPerformanceRecorder.getInstance().getNugget("queryScopeForAtomicInt"); - try { + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget("queryScopeForAtomicInt")) { int count = 0; int[] constantColIndex = new int[expectedConstValues.length]; String[] columns = source.getDefinition().getColumnNamesArray(); @@ -412,8 +400,6 @@ private void checkConstantFormula(final Table source, final Set expe source.getColumnSource(columns[constantColIndex[i]]).get(key)); } }); - } finally { - nugget.done(); } } } diff --git a/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java b/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java index 9e9a431923b..305bdc9a8f9 100644 --- a/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java +++ b/extensions/csv/src/main/java/io/deephaven/csv/CsvTools.java @@ -41,7 +41,6 @@ import io.deephaven.engine.table.impl.DataAccessHelpers; import io.deephaven.engine.table.WritableColumnSource; import io.deephaven.engine.table.impl.InMemoryTable; -import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.sources.BooleanArraySource; import io.deephaven.engine.table.impl.sources.ByteArraySource; @@ -59,6 +58,7 @@ import io.deephaven.time.DateTimeUtils; import io.deephaven.util.BooleanUtils; import io.deephaven.util.QueryConstants; +import io.deephaven.util.SafeCloseable; import io.deephaven.util.annotations.ScriptApi; import org.jetbrains.annotations.Nullable; @@ -917,9 +917,8 @@ private static void writeCsvContentsSeq( final boolean nullsAsEmpty, final char separator, @Nullable final BiConsumer progress) throws IOException { - final QueryPerformanceNugget nugget = - QueryPerformanceRecorder.getInstance().getNugget("CsvTools.writeCsvContentsSeq()"); - try { + try (final SafeCloseable ignored = + QueryPerformanceRecorder.getInstance().getNugget("CsvTools.writeCsvContentsSeq()")) { String separatorStr = String.valueOf(separator); for (long i = 0; i < size; i++) { for (int j = 0; j < cols.length; j++) { @@ -945,8 +944,6 @@ private static void writeCsvContentsSeq( progress.accept(i, size); } } - } finally { - nugget.done(); } } diff --git a/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueries.java b/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueries.java index 14e6177f8be..0ec22a8b8bb 100644 --- a/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueries.java +++ b/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueries.java @@ -6,6 +6,7 @@ import com.google.auto.service.AutoService; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.hierarchical.TreeTable; import io.deephaven.engine.util.GroovyDeephavenSession; import io.deephaven.util.annotations.ScriptApi; @@ -68,6 +69,27 @@ public static Table queryOperationPerformance(final long evaluationNumber) { evaluationNumber); } + /** + * Converts the query performance table into a tree table. + * + * @return query performance tree table. + */ + @ScriptApi + public static TreeTable queryPerformanceAsTreeTable() { + return PerformanceQueriesGeneral.queryPerformanceAsTreeTable(TableLoggers.queryPerformanceLog()); + } + + /** + * Merges the query performance and query operation performance tables into a single tree table. + * + * @return query operation performance tree table. + */ + @ScriptApi + public static TreeTable queryOperationPerformanceAsTreeTable() { + return PerformanceQueriesGeneral.queryOperationPerformanceAsTreeTable( + TableLoggers.queryPerformanceLog(), TableLoggers.queryOperationPerformanceLog()); + } + /** * Gets the information for a process. * diff --git a/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueriesGeneral.java b/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueriesGeneral.java index 43024345ed2..b06e5946845 100644 --- a/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueriesGeneral.java +++ b/extensions/performance/src/main/java/io/deephaven/engine/table/impl/util/PerformanceQueriesGeneral.java @@ -3,16 +3,20 @@ */ package io.deephaven.engine.table.impl.util; +import com.google.common.collect.Sets; import io.deephaven.engine.table.Table; -import io.deephaven.engine.table.impl.DataAccessHelpers; +import io.deephaven.engine.table.hierarchical.TreeTable; +import io.deephaven.engine.util.TableTools; import io.deephaven.plot.Figure; import io.deephaven.plot.PlottingConvenience; import io.deephaven.util.QueryConstants; +import org.jetbrains.annotations.NotNull; import java.util.Arrays; import java.util.HashMap; import java.util.Map; -import java.util.OptionalLong; +import java.util.Set; +import java.util.stream.Stream; import static io.deephaven.api.agg.Aggregation.AggFirst; import static io.deephaven.api.agg.Aggregation.AggMax; @@ -23,7 +27,11 @@ * Generalizes {@link PerformanceQueries} to accept table parameters and make evaluation number parameter optional. */ public class PerformanceQueriesGeneral { - private static boolean formatPctColumns = true; + private static final boolean FORMAT_PCT_COLUMNS = true; + private static final Set ALLOWED_MISSING_COLUMN_NAMES = Sets.newHashSet( + "ProcessUniqueId", + "ParentEvaluationNumber", + "ParentOperationNumber"); public static Table queryPerformance(Table queryPerformanceLog, final long evaluationNumber) { @@ -35,21 +43,22 @@ public static Table queryPerformance(Table queryPerformanceLog, final long evalu queryPerformanceLog = queryPerformanceLog .updateView( "WorkerHeapSize = " + workerHeapSizeBytes + "L", - "TimeSecs = nanosToMillis(EndTime - StartTime) / 1000d", // How long this query ran for, in - // seconds + // How long this query ran for, in seconds + "TimeSecs = nanosToMillis(EndTime - StartTime) / 1000d", "NetMemoryChange = FreeMemoryChange - TotalMemoryChange", - "QueryMemUsed = TotalMemory - FreeMemory", // Memory in use by the query. (Only - // includes active heap memory.) - "QueryMemUsedPct = QueryMemUsed / WorkerHeapSize", // Memory usage as a percenage of max heap - // size (-Xmx) - "QueryMemFree = WorkerHeapSize - QueryMemUsed" // Remaining memory until the query runs into the - // max heap size - ) - .moveColumnsUp( - "ProcessUniqueId", "EvaluationNumber", - "QueryMemUsed", "QueryMemFree", "QueryMemUsedPct", - "EndTime", "TimeSecs", "NetMemoryChange"); - if (formatPctColumns) { + // Memory in use by the query. (Only includes active heap memory.) + "QueryMemUsed = TotalMemory - FreeMemory", + // Memory usage as a percenage of max heap size (-Xmx) + "QueryMemUsedPct = QueryMemUsed / WorkerHeapSize", + // Remaining memory until the query runs into the max heap size + "QueryMemFree = WorkerHeapSize - QueryMemUsed"); + + queryPerformanceLog = maybeMoveColumnsUp(queryPerformanceLog, + "ProcessUniqueId", "EvaluationNumber", "ParentEvaluationNumber", + "QueryMemUsed", "QueryMemFree", "QueryMemUsedPct", + "EndTime", "TimeSecs", "NetMemoryChange"); + + if (FORMAT_PCT_COLUMNS) { queryPerformanceLog = formatColumnsAsPct(queryPerformanceLog, "QueryMemUsedPct"); } return queryPerformanceLog; @@ -64,15 +73,16 @@ public static Table queryOperationPerformance(Table queryOps, final long evaluat queryOps = queryOps.where(whereConditionForEvaluationNumber(evaluationNumber)); } - return queryOps + queryOps = queryOps .updateView( "TimeSecs = nanosToMillis(EndTime - StartTime) / 1000d", - "NetMemoryChange = FreeMemoryChange - TotalMemoryChange" // Change in memory usage delta while - // this query was executing - ) - .moveColumnsUp( - "ProcessUniqueId", "EvaluationNumber", "OperationNumber", - "EndTime", "TimeSecs", "NetMemoryChange"); + // Change in memory usage delta while this query was executing + "NetMemoryChange = FreeMemoryChange - TotalMemoryChange"); + + return maybeMoveColumnsUp(queryOps, + "ProcessUniqueId", "EvaluationNumber", "ParentEvaluationNumber", + "OperationNumber", "ParentOperationNumber", + "EndTime", "TimeSecs", "NetMemoryChange"); } public static Table queryOperationPerformance(final Table queryOps) { @@ -84,11 +94,7 @@ public static String processInfo(Table processInfo, final String processInfoId, processInfo = processInfo .where("Id = `" + processInfoId + "`", "Type = `" + type + "`", "Key = `" + key + "`") .select("Value"); - try { - return (String) DataAccessHelpers.getColumn(processInfo, 0).get(0); - } catch (Exception e) { - return null; - } + return processInfo.getColumnSource("Value").get(processInfo.getRowSet().firstRowKey()); } public static Table queryUpdatePerformance(Table queryUpdatePerformance, final long evaluationNumber, @@ -101,25 +107,27 @@ public static Table queryUpdatePerformance(Table queryUpdatePerformance, final l queryUpdatePerformance = queryUpdatePerformance .updateView( "WorkerHeapSize = " + workerHeapSizeBytes + "L", - "Ratio = EntryIntervalUsage / IntervalDurationNanos", // % of time during this interval that the - // operation was using CPU - "QueryMemUsed = MaxTotalMemory - MinFreeMemory", // Memory in use by the query. (Only - // includes active heap memory.) - "QueryMemUsedPct = QueryMemUsed / WorkerHeapSize", // Memory usage as a percenage of the max - // heap size (-Xmx) - "QueryMemFree = WorkerHeapSize - QueryMemUsed", // Remaining memory until the query runs into - // the max heap size - "NRows = EntryIntervalAdded + EntryIntervalRemoved + EntryIntervalModified", // Total number of - // changed rows - "RowsPerSec = round(NRows / IntervalDurationNanos * 1.0e9)", // Average rate data is ticking at - "RowsPerCPUSec = round(NRows / EntryIntervalUsage * 1.0e9)" // Approximation of how fast CPU - // handles row changes - ) - .moveColumnsUp( - "ProcessUniqueId", "EvaluationNumber", "OperationNumber", - "Ratio", "QueryMemUsed", "QueryMemUsedPct", "IntervalEndTime", - "RowsPerSec", "RowsPerCPUSec", "EntryDescription"); - if (formatPctColumnsLocal && formatPctColumns) { + // % of time during this interval that the operation was using CPU + "Ratio = EntryIntervalUsage / IntervalDurationNanos", + // Memory in use by the query. (Only includes active heap memory.) + "QueryMemUsed = MaxTotalMemory - MinFreeMemory", + // Memory usage as a percentage of the max heap size (-Xmx) + "QueryMemUsedPct = QueryMemUsed / WorkerHeapSize", + // Remaining memory until the query runs into the max heap size + "QueryMemFree = WorkerHeapSize - QueryMemUsed", + // Total number of changed rows + "NRows = EntryIntervalAdded + EntryIntervalRemoved + EntryIntervalModified", + // Average rate data is ticking at + "RowsPerSec = round(NRows / IntervalDurationNanos * 1.0e9)", + // Approximation of how fast CPU handles row changes + "RowsPerCPUSec = round(NRows / EntryIntervalUsage * 1.0e9)"); + + queryUpdatePerformance = maybeMoveColumnsUp(queryUpdatePerformance, + "ProcessUniqueId", "EvaluationNumber", "OperationNumber", + "Ratio", "QueryMemUsed", "QueryMemUsedPct", "IntervalEndTime", + "RowsPerSec", "RowsPerCPUSec", "EntryDescription"); + + if (formatPctColumnsLocal && FORMAT_PCT_COLUMNS) { queryUpdatePerformance = formatColumnsAsPctUpdatePerformance(queryUpdatePerformance); } return queryUpdatePerformance; @@ -149,6 +157,7 @@ public static Map queryUpdatePerformanceMap(final Table queryUpda "EntryIntervalAdded", "EntryIntervalRemoved", "EntryIntervalModified", + "EntryIntervalShifted", "NRows"); // Create a table showing the 'worst' updates, i.e. the operations with the greatest 'Ratio' @@ -174,7 +183,7 @@ public static Map queryUpdatePerformanceMap(final Table queryUpda AggPct(0.50, "Ratio_50_Percentile = Ratio", "QueryMemUsedPct_50_Percentile = QueryMemUsedPct"), AggMax("Ratio_Max = Ratio", "QueryMemUsedPct_Max = QueryMemUsedPct"))); - if (formatPctColumns) { + if (FORMAT_PCT_COLUMNS) { qup = formatColumnsAsPctUpdatePerformance(qup); worstInterval = formatColumnsAsPct(worstInterval, "Ratio"); updateWorst = formatColumnsAsPctUpdatePerformance(updateWorst); @@ -270,7 +279,7 @@ public static Map serverStateWithPlots(final Table pml) { final Table pm = serverState(pml); resultMap.put("ServerState", pm); - int maxMemMiB = DataAccessHelpers.getColumn(pm, "MaxMemMiB").getInt(0); + int maxMemMiB = pm.getColumnSource("MaxMemMiB").getInt(pm.getRowSet().firstRowKey()); if (maxMemMiB == QueryConstants.NULL_INT) { maxMemMiB = 4096; } @@ -310,6 +319,35 @@ public static Map serverStateWithPlots(final Table pml) { return resultMap; } + public static TreeTable queryPerformanceAsTreeTable(@NotNull final Table qpl) { + return qpl.tree("EvaluationNumber", "ParentEvaluationNumber"); + } + + public static TreeTable queryOperationPerformanceAsTreeTable( + @NotNull final Table qpl, @NotNull final Table qopl) { + // TODO (https://github.com/deephaven/deephaven-core/issues/4814): use NULL_INT for ParentOperationNumber and + // Depth once we can prevent any compilation or at least reduce multiple usages to a single formula + Table mergeWithAggKeys = TableTools.merge( + qpl.updateView( + "EvalKey = Long.toString(EvaluationNumber)", + "ParentEvalKey = ParentEvaluationNumber == null ? null : (Long.toString(ParentEvaluationNumber))", + "OperationNumber = NULL_INT", + "ParentOperationNumber = OperationNumber", + "Depth = OperationNumber", + "CallerLine = (String) null", + "IsCompilation = NULL_BOOLEAN", + "InputSizeLong = NULL_LONG"), + qopl.updateView( + "EvalKey = EvaluationNumber + `:` + OperationNumber", + "ParentEvalKey = EvaluationNumber + (ParentOperationNumber == null ? `` : (`:` + ParentOperationNumber))", + "Exception = (String) null")) + .moveColumnsUp("EvalKey", "ParentEvalKey") + .moveColumnsDown("EvaluationNumber", "ParentEvaluationNumber", "OperationNumber", + "ParentOperationNumber"); + + return mergeWithAggKeys.tree("EvalKey", "ParentEvalKey"); + } + private static Table formatColumnsAsPct(final Table t, final String... cols) { final String[] formats = new String[cols.length]; for (int i = 0; i < cols.length; ++i) { @@ -323,11 +361,17 @@ private static Table formatColumnsAsPctUpdatePerformance(final Table updatePerfo } private static long getWorkerHeapSizeBytes() { - final OptionalLong opt = EngineMetrics.getProcessInfo().getMemoryInfo().heap().max(); - return opt.orElse(0); + return EngineMetrics.getProcessInfo().getMemoryInfo().heap().max().orElse(0); } private static String whereConditionForEvaluationNumber(final long evaluationNumber) { - return "EvaluationNumber = " + evaluationNumber + ""; + return "EvaluationNumber = " + evaluationNumber; + } + + private static Table maybeMoveColumnsUp(final Table source, final String... cols) { + return source.moveColumnsUp(Stream.of(cols) + .filter(columnName -> !ALLOWED_MISSING_COLUMN_NAMES.contains(columnName) + || source.hasColumns(columnName)) + .toArray(String[]::new)); } } diff --git a/props/configs/src/main/resources/defaultPackageFilters.qpr b/props/configs/src/main/resources/defaultPackageFilters.qpr index 866e95bcd36..df0b4c41167 100644 --- a/props/configs/src/main/resources/defaultPackageFilters.qpr +++ b/props/configs/src/main/resources/defaultPackageFilters.qpr @@ -1,6 +1,10 @@ java. sun. -groovy.lang -org.codehaus.groovy +groovy.lang. +org.codehaus.groovy. io.deephaven. -io.deephaven.engine +io.grpc. +com.google.common. +org.eclipse. +jdk.internal. +org.jpy. diff --git a/py/server/deephaven/perfmon.py b/py/server/deephaven/perfmon.py index ccf602a35b4..4f9b76c1483 100644 --- a/py/server/deephaven/perfmon.py +++ b/py/server/deephaven/perfmon.py @@ -11,7 +11,8 @@ from deephaven import DHError from deephaven.jcompat import j_map_to_dict -from deephaven.table import Table +from deephaven.table import Table, TreeTable +from deephaven.update_graph import auto_locking_ctx _JPerformanceQueries = jpy.get_type("io.deephaven.engine.table.impl.util.PerformanceQueries") _JMetricsManager = jpy.get_type("io.deephaven.util.metrics.MetricsManager") @@ -95,6 +96,40 @@ def query_performance_log() -> Table: except Exception as e: raise DHError(e, "failed to obtain the query performance log table.") from e +def query_operation_performance_tree_table() -> TreeTable: + """ Returns a tree table with Deephaven performance data for individual subqueries. + + Returns: + a TreeTable + + Raises: + DHError + """ + try: + with auto_locking_ctx(query_performance_log()): + return TreeTable(j_tree_table=_JPerformanceQueries.queryOperationPerformanceAsTreeTable(), + id_col = "EvalKey", parent_col = "ParentEvalKey") + except Exception as e: + raise DHError(e, "failed to obtain the query operation performance log as tree table.") from e + + +def query_performance_tree_table() -> TreeTable: + """ Returns a tree table with Deephaven query performance data. Performance data for individual sub-operations as + a tree table is available from calling `query_operation_performance_tree_table`. + + Returns: + a TreeTable + + Raises: + DHError + """ + try: + with auto_locking_ctx(query_performance_log()): + return TreeTable(j_tree_table=_JPerformanceQueries.queryPerformanceAsTreeTable(), + id_col = "EvaluationNumber", parent_col = "ParentEvaluationNumber") + except Exception as e: + raise DHError(e, "failed to obtain the query performance log as tree table.") from e + def update_performance_log() -> Table: """ Returns a table with Deephaven update performance data. diff --git a/py/server/tests/test_perfmon.py b/py/server/tests/test_perfmon.py index 121b774aea4..60b864cd696 100644 --- a/py/server/tests/test_perfmon.py +++ b/py/server/tests/test_perfmon.py @@ -7,7 +7,7 @@ from deephaven import empty_table from deephaven.perfmon import process_info_log, process_metrics_log, server_state_log, \ query_operation_performance_log, query_performance_log, update_performance_log, metrics_get_counters, \ - metrics_reset_counters + metrics_reset_counters, query_performance_tree_table, query_operation_performance_tree_table from deephaven.perfmon import query_update_performance, query_performance, query_operation_performance, server_state from tests.testbase import BaseTestCase @@ -62,6 +62,8 @@ def test_query_logs(self): self.assertTrue(log_table.to_string()) log_table = update_performance_log() self.assertTrue(log_table.to_string()) + log_table = query_performance_tree_table() + self.assertIsNotNone(log_table) def test_performance_queries(self): q = query_performance(1) @@ -72,6 +74,8 @@ def test_performance_queries(self): self.assertTrue(q.to_string()) q = query_update_performance(1) self.assertTrue(q.to_string()) + q = query_operation_performance_tree_table() + self.assertIsNotNone(q) if __name__ == '__main__': diff --git a/server/src/main/java/io/deephaven/server/arrow/ArrowFlightUtil.java b/server/src/main/java/io/deephaven/server/arrow/ArrowFlightUtil.java index c2351dfeae2..73df0eead12 100644 --- a/server/src/main/java/io/deephaven/server/arrow/ArrowFlightUtil.java +++ b/server/src/main/java/io/deephaven/server/arrow/ArrowFlightUtil.java @@ -19,6 +19,8 @@ import io.deephaven.engine.table.Table; import io.deephaven.engine.table.impl.BaseTable; import io.deephaven.engine.table.impl.QueryTable; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.util.BarrageMessage; import io.deephaven.engine.updategraph.UpdateGraph; import io.deephaven.extensions.barrage.BarragePerformanceLog; @@ -71,37 +73,44 @@ public static void DoGetCustom( final Flight.Ticket request, final StreamObserver observer) { - final SessionState.ExportObject> export = - ticketRouter.resolve(session, request, "request"); - - final BarragePerformanceLog.SnapshotMetricsHelper metrics = - new BarragePerformanceLog.SnapshotMetricsHelper(); - - final long queueStartTm = System.nanoTime(); - session.nonExport() - .require(export) - .onError(observer) - .submit(() -> { - metrics.queueNanos = System.nanoTime() - queueStartTm; - final BaseTable table = export.get(); - metrics.tableId = Integer.toHexString(System.identityHashCode(table)); - metrics.tableKey = BarragePerformanceLog.getKeyFor(table); - - // create an adapter for the response observer - final StreamObserver listener = - ArrowModule.provideListenerAdapter().adapt(observer); - - // push the schema to the listener - listener.onNext(streamGeneratorFactory.getSchemaView( - fbb -> BarrageUtil.makeTableSchemaPayload(fbb, DEFAULT_SNAPSHOT_DESER_OPTIONS, - table.getDefinition(), table.getAttributes()))); - - // shared code between `DoGet` and `BarrageSnapshotRequest` - BarrageUtil.createAndSendSnapshot(streamGeneratorFactory, table, null, null, false, - DEFAULT_SNAPSHOT_DESER_OPTIONS, listener, metrics); - - listener.onCompleted(); - }); + final String description = "FlightService#DoGet(table=" + ticketRouter.getLogNameFor(request, "table") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject> tableExport = + ticketRouter.resolve(session, request, "table"); + + final BarragePerformanceLog.SnapshotMetricsHelper metrics = + new BarragePerformanceLog.SnapshotMetricsHelper(); + + final long queueStartTm = System.nanoTime(); + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(tableExport) + .onError(observer) + .submit(() -> { + metrics.queueNanos = System.nanoTime() - queueStartTm; + final BaseTable table = tableExport.get(); + metrics.tableId = Integer.toHexString(System.identityHashCode(table)); + metrics.tableKey = BarragePerformanceLog.getKeyFor(table); + + // create an adapter for the response observer + final StreamObserver listener = + ArrowModule.provideListenerAdapter().adapt(observer); + + // push the schema to the listener + listener.onNext(streamGeneratorFactory.getSchemaView( + fbb -> BarrageUtil.makeTableSchemaPayload(fbb, DEFAULT_SNAPSHOT_DESER_OPTIONS, + table.getDefinition(), table.getAttributes()))); + + // shared code between `DoGet` and `BarrageSnapshotRequest` + BarrageUtil.createAndSendSnapshot(streamGeneratorFactory, table, null, null, false, + DEFAULT_SNAPSHOT_DESER_OPTIONS, listener, metrics); + + listener.onCompleted(); + }); + } } /** @@ -478,69 +487,78 @@ public void handleMessage(@NotNull final BarrageProtoUtil.MessageInfo message) { final BarrageSnapshotRequest snapshotRequest = BarrageSnapshotRequest .getRootAsBarrageSnapshotRequest(message.app_metadata.msgPayloadAsByteBuffer()); - final SessionState.ExportObject> parent = - ticketRouter.resolve(session, snapshotRequest.ticketAsByteBuffer(), "ticket"); - - final BarragePerformanceLog.SnapshotMetricsHelper metrics = - new BarragePerformanceLog.SnapshotMetricsHelper(); - - final long queueStartTm = System.nanoTime(); - session.nonExport() - .require(parent) - .onError(listener) - .submit(() -> { - metrics.queueNanos = System.nanoTime() - queueStartTm; - final BaseTable table = parent.get(); - metrics.tableId = Integer.toHexString(System.identityHashCode(table)); - metrics.tableKey = BarragePerformanceLog.getKeyFor(table); - - if (table.isFailed()) { - throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, - "Table is already failed"); - } - - // push the schema to the listener - listener.onNext(streamGeneratorFactory.getSchemaView( - fbb -> BarrageUtil.makeTableSchemaPayload(fbb, - snapshotOptAdapter.adapt(snapshotRequest), - table.getDefinition(), table.getAttributes()))); - - // collect the viewport and columnsets (if provided) - final boolean hasColumns = snapshotRequest.columnsVector() != null; - final BitSet columns = - hasColumns ? BitSet.valueOf(snapshotRequest.columnsAsByteBuffer()) : null; - - final boolean hasViewport = snapshotRequest.viewportVector() != null; - RowSet viewport = - hasViewport - ? BarrageProtoUtil.toRowSet(snapshotRequest.viewportAsByteBuffer()) - : null; - - final boolean reverseViewport = snapshotRequest.reverseViewport(); - - // leverage common code for `DoGet` and `BarrageSnapshotOptions` - BarrageUtil.createAndSendSnapshot(streamGeneratorFactory, table, columns, viewport, - reverseViewport, snapshotOptAdapter.adapt(snapshotRequest), listener, metrics); - HalfClosedState newState = halfClosedState.updateAndGet(current -> { - switch (current) { - case DONT_CLOSE: - // record that we have finished sending - return HalfClosedState.FINISHED_SENDING; - case CLIENT_HALF_CLOSED: - // since streaming has now finished, and client already half-closed, time to - // half close from server - return HalfClosedState.CLOSED; - case FINISHED_SENDING: - case CLOSED: - throw new IllegalStateException("Can't finish streaming twice"); - default: - throw new IllegalStateException("Unknown state " + current); + final String description = "FlightService#DoExchange(snapshot, table=" + + ticketRouter.getLogNameFor(snapshotRequest.ticketAsByteBuffer(), "table") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject> tableExport = + ticketRouter.resolve(session, snapshotRequest.ticketAsByteBuffer(), "table"); + + final BarragePerformanceLog.SnapshotMetricsHelper metrics = + new BarragePerformanceLog.SnapshotMetricsHelper(); + + final long queueStartTm = System.nanoTime(); + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(tableExport) + .onError(listener) + .submit(() -> { + metrics.queueNanos = System.nanoTime() - queueStartTm; + final BaseTable table = tableExport.get(); + metrics.tableId = Integer.toHexString(System.identityHashCode(table)); + metrics.tableKey = BarragePerformanceLog.getKeyFor(table); + + if (table.isFailed()) { + throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, + "Table is already failed"); + } + + // push the schema to the listener + listener.onNext(streamGeneratorFactory.getSchemaView( + fbb -> BarrageUtil.makeTableSchemaPayload(fbb, + snapshotOptAdapter.adapt(snapshotRequest), + table.getDefinition(), table.getAttributes()))); + + // collect the viewport and columnsets (if provided) + final boolean hasColumns = snapshotRequest.columnsVector() != null; + final BitSet columns = + hasColumns ? BitSet.valueOf(snapshotRequest.columnsAsByteBuffer()) : null; + + final boolean hasViewport = snapshotRequest.viewportVector() != null; + RowSet viewport = + hasViewport + ? BarrageProtoUtil.toRowSet(snapshotRequest.viewportAsByteBuffer()) + : null; + + final boolean reverseViewport = snapshotRequest.reverseViewport(); + + // leverage common code for `DoGet` and `BarrageSnapshotOptions` + BarrageUtil.createAndSendSnapshot(streamGeneratorFactory, table, columns, viewport, + reverseViewport, snapshotOptAdapter.adapt(snapshotRequest), listener, + metrics); + HalfClosedState newState = halfClosedState.updateAndGet(current -> { + switch (current) { + case DONT_CLOSE: + // record that we have finished sending + return HalfClosedState.FINISHED_SENDING; + case CLIENT_HALF_CLOSED: + // since streaming has now finished, and client already half-closed, + // time to half close from server + return HalfClosedState.CLOSED; + case FINISHED_SENDING: + case CLOSED: + throw new IllegalStateException("Can't finish streaming twice"); + default: + throw new IllegalStateException("Unknown state " + current); + } + }); + if (newState == HalfClosedState.CLOSED) { + listener.onCompleted(); } }); - if (newState == HalfClosedState.CLOSED) { - listener.onCompleted(); - } - }); + } } } @@ -623,14 +641,23 @@ public void handleMessage(@NotNull final MessageInfo message) { preExportSubscriptions = new ArrayDeque<>(); preExportSubscriptions.add(subscriptionRequest); - final SessionState.ExportObject parent = - ticketRouter.resolve(session, subscriptionRequest.ticketAsByteBuffer(), "ticket"); - - synchronized (this) { - onExportResolvedContinuation = session.nonExport() - .require(parent) - .onErrorHandler(DoExchangeMarshaller.this::onError) - .submit(() -> onExportResolved(parent)); + + final String description = "FlightService#DoExchange(subscription, table=" + + ticketRouter.getLogNameFor(subscriptionRequest.ticketAsByteBuffer(), "table") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject table = + ticketRouter.resolve(session, subscriptionRequest.ticketAsByteBuffer(), "table"); + + synchronized (this) { + onExportResolvedContinuation = session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(table) + .onErrorHandler(DoExchangeMarshaller.this::onError) + .submit(() -> onExportResolved(table)); + } } } } diff --git a/server/src/main/java/io/deephaven/server/arrow/FlightServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/arrow/FlightServiceGrpcImpl.java index 66a7de0d37a..ca0b55a69b5 100644 --- a/server/src/main/java/io/deephaven/server/arrow/FlightServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/arrow/FlightServiceGrpcImpl.java @@ -10,6 +10,9 @@ import io.deephaven.auth.AuthenticationException; import io.deephaven.auth.AuthenticationRequestHandler; import io.deephaven.auth.BasicAuthMarshaller; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; +import io.deephaven.engine.table.impl.util.EngineMetrics; import io.deephaven.extensions.barrage.BarrageStreamGenerator; import io.deephaven.extensions.barrage.util.GrpcUtil; import io.deephaven.internal.log.LoggerFactory; @@ -22,6 +25,8 @@ import io.deephaven.server.session.SessionState; import io.deephaven.server.session.TicketRouter; import io.deephaven.auth.AuthContext; +import io.deephaven.util.SafeCloseable; +import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; import org.apache.arrow.flight.impl.Flight; import org.apache.arrow.flight.impl.FlightServiceGrpc; @@ -170,30 +175,43 @@ public void getFlightInfo( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getOptionalSession(); - final SessionState.ExportObject export = - ticketRouter.flightInfoFor(session, request, "request"); - - if (session != null) { - session.nonExport() - .require(export) - .onError(responseObserver) - .submit(() -> { - responseObserver.onNext(export.get()); - responseObserver.onCompleted(); - }); - } else { + final String description = "FlightService#getFlightInfo(request=" + request + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session == null ? null : session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject export = + ticketRouter.flightInfoFor(session, request, "request"); + + if (session != null) { + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(export) + .onError(responseObserver) + .submit(() -> { + responseObserver.onNext(export.get()); + responseObserver.onCompleted(); + }); + return; + } + + StatusRuntimeException exception = null; if (export.tryRetainReference()) { try { if (export.getState() == ExportNotification.State.EXPORTED) { - responseObserver.onNext(export.get()); - responseObserver.onCompleted(); + GrpcUtil.safelyOnNext(responseObserver, export.get()); + GrpcUtil.safelyComplete(responseObserver); } } finally { export.dropReference(); } } else { - responseObserver.onError( - Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "Could not find flight info")); + exception = Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "Could not find flight info"); + GrpcUtil.safelyError(responseObserver, exception); + } + + if (queryPerformanceRecorder.endQuery() || exception != null) { + EngineMetrics.getInstance().logQueryProcessingResults(queryPerformanceRecorder, exception); } } } @@ -204,33 +222,48 @@ public void getSchema( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getOptionalSession(); - final SessionState.ExportObject export = - ticketRouter.flightInfoFor(session, request, "request"); + final String description = "FlightService#getSchema(request=" + request + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session == null ? null : session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject export = + ticketRouter.flightInfoFor(session, request, "request"); - if (session != null) { - session.nonExport() - .require(export) - .onError(responseObserver) - .submit(() -> { - responseObserver.onNext(Flight.SchemaResult.newBuilder() + if (session != null) { + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(export) + .onError(responseObserver) + .submit(() -> { + responseObserver.onNext(Flight.SchemaResult.newBuilder() + .setSchema(export.get().getSchema()) + .build()); + responseObserver.onCompleted(); + }); + return; + } + + StatusRuntimeException exception = null; + if (export.tryRetainReference()) { + try { + if (export.getState() == ExportNotification.State.EXPORTED) { + GrpcUtil.safelyOnNext(responseObserver, Flight.SchemaResult.newBuilder() .setSchema(export.get().getSchema()) .build()); - responseObserver.onCompleted(); - }); - } else if (export.tryRetainReference()) { - try { - if (export.getState() == ExportNotification.State.EXPORTED) { - responseObserver.onNext(Flight.SchemaResult.newBuilder() - .setSchema(export.get().getSchema()) - .build()); - responseObserver.onCompleted(); + GrpcUtil.safelyComplete(responseObserver); + } + } finally { + export.dropReference(); } - } finally { - export.dropReference(); + } else { + exception = Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "Could not find flight info"); + responseObserver.onError(exception); + } + + if (queryPerformanceRecorder.endQuery() || exception != null) { + EngineMetrics.getInstance().logQueryProcessingResults(queryPerformanceRecorder, exception); } - } else { - responseObserver.onError( - Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "Could not find flight info")); } } diff --git a/server/src/main/java/io/deephaven/server/barrage/BarrageMessageProducer.java b/server/src/main/java/io/deephaven/server/barrage/BarrageMessageProducer.java index d06c189d4be..c8ad8192fed 100644 --- a/server/src/main/java/io/deephaven/server/barrage/BarrageMessageProducer.java +++ b/server/src/main/java/io/deephaven/server/barrage/BarrageMessageProducer.java @@ -151,7 +151,7 @@ public Operation( @Override public String getDescription() { - return "BarrageMessageProducer(" + updateIntervalMs + ")"; + return "BarrageMessageProducer(" + updateIntervalMs + "," + System.identityHashCode(parent) + ")"; } @Override diff --git a/server/src/main/java/io/deephaven/server/console/ConsoleServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/console/ConsoleServiceGrpcImpl.java index e99e14f1efb..3fec26cab7b 100644 --- a/server/src/main/java/io/deephaven/server/console/ConsoleServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/console/ConsoleServiceGrpcImpl.java @@ -8,6 +8,8 @@ import io.deephaven.base.LockFreeArrayQueue; import io.deephaven.configuration.Configuration; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.util.RuntimeMemory; import io.deephaven.engine.table.impl.util.RuntimeMemory.Sample; import io.deephaven.engine.updategraph.DynamicNode; @@ -35,6 +37,7 @@ import io.deephaven.server.session.SessionState.ExportBuilder; import io.deephaven.server.session.TicketRouter; import io.deephaven.server.util.Scheduler; +import io.deephaven.util.SafeCloseable; import io.grpc.stub.ServerCallStreamObserver; import io.grpc.stub.StreamObserver; import org.jetbrains.annotations.NotNull; @@ -164,29 +167,38 @@ public void executeCommand( throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "No consoleId supplied"); } - SessionState.ExportObject exportedConsole = - ticketRouter.resolve(session, consoleId, "consoleId"); - session.nonExport() - .requiresSerialQueue() - .require(exportedConsole) - .onError(responseObserver) - .submit(() -> { - ScriptSession scriptSession = exportedConsole.get(); - ScriptSession.Changes changes = scriptSession.evaluateScript(request.getCode()); - ExecuteCommandResponse.Builder diff = ExecuteCommandResponse.newBuilder(); - FieldsChangeUpdate.Builder fieldChanges = FieldsChangeUpdate.newBuilder(); - changes.created.entrySet() - .forEach(entry -> fieldChanges.addCreated(makeVariableDefinition(entry))); - changes.updated.entrySet() - .forEach(entry -> fieldChanges.addUpdated(makeVariableDefinition(entry))); - changes.removed.entrySet() - .forEach(entry -> fieldChanges.addRemoved(makeVariableDefinition(entry))); - if (changes.error != null) { - diff.setErrorMessage(Throwables.getStackTraceAsString(changes.error)); - log.error().append("Error running script: ").append(changes.error).endl(); - } - safelyComplete(responseObserver, diff.setChanges(fieldChanges).build()); - }); + final String description = "ConsoleService#executeCommand(console=" + + ticketRouter.getLogNameFor(consoleId, "consoleId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject exportedConsole = + ticketRouter.resolve(session, consoleId, "consoleId"); + + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .requiresSerialQueue() + .require(exportedConsole) + .onError(responseObserver) + .submit(() -> { + ScriptSession scriptSession = exportedConsole.get(); + ScriptSession.Changes changes = scriptSession.evaluateScript(request.getCode()); + ExecuteCommandResponse.Builder diff = ExecuteCommandResponse.newBuilder(); + FieldsChangeUpdate.Builder fieldChanges = FieldsChangeUpdate.newBuilder(); + changes.created.entrySet() + .forEach(entry -> fieldChanges.addCreated(makeVariableDefinition(entry))); + changes.updated.entrySet() + .forEach(entry -> fieldChanges.addUpdated(makeVariableDefinition(entry))); + changes.removed.entrySet() + .forEach(entry -> fieldChanges.addRemoved(makeVariableDefinition(entry))); + if (changes.error != null) { + diff.setErrorMessage(Throwables.getStackTraceAsString(changes.error)); + log.error().append("Error running script: ").append(changes.error).endl(); + } + safelyComplete(responseObserver, diff.setChanges(fieldChanges).build()); + }); + } } @Override @@ -236,36 +248,48 @@ public void bindTableToVariable( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); - Ticket tableId = request.getTableId(); + final Ticket tableId = request.getTableId(); if (tableId.getTicket().isEmpty()) { throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "No source tableId supplied"); } - final SessionState.ExportObject

exportedTable = ticketRouter.resolve(session, tableId, "tableId"); - final SessionState.ExportObject exportedConsole; - ExportBuilder exportBuilder = session.nonExport() - .requiresSerialQueue() - .onError(responseObserver); + final String description = "ConsoleService#bindTableToVariable(table=" + + ticketRouter.getLogNameFor(tableId, "tableId") + ", variableName=" + request.getVariableName() + + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
exportedTable = + ticketRouter.resolve(session, tableId, "tableId"); + + final SessionState.ExportObject exportedConsole; + + ExportBuilder exportBuilder = session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .requiresSerialQueue() + .onError(responseObserver); + + if (request.hasConsoleId()) { + exportedConsole = ticketRouter.resolve(session, request.getConsoleId(), "consoleId"); + exportBuilder.require(exportedTable, exportedConsole); + } else { + exportedConsole = null; + exportBuilder.require(exportedTable); + } - if (request.hasConsoleId()) { - exportedConsole = ticketRouter.resolve(session, request.getConsoleId(), "consoleId"); - exportBuilder.require(exportedTable, exportedConsole); - } else { - exportedConsole = null; - exportBuilder.require(exportedTable); + exportBuilder.submit(() -> { + ScriptSession scriptSession = + exportedConsole != null ? exportedConsole.get() : scriptSessionProvider.get(); + Table table = exportedTable.get(); + scriptSession.setVariable(request.getVariableName(), table); + if (DynamicNode.notDynamicOrIsRefreshing(table)) { + scriptSession.manage(table); + } + responseObserver.onNext(BindTableToVariableResponse.getDefaultInstance()); + responseObserver.onCompleted(); + }); } - - exportBuilder.submit(() -> { - ScriptSession scriptSession = - exportedConsole != null ? exportedConsole.get() : scriptSessionProvider.get(); - Table table = exportedTable.get(); - scriptSession.setVariable(request.getVariableName(), table); - if (DynamicNode.notDynamicOrIsRefreshing(table)) { - scriptSession.manage(table); - } - responseObserver.onNext(BindTableToVariableResponse.getDefaultInstance()); - responseObserver.onCompleted(); - }); } @Override diff --git a/server/src/main/java/io/deephaven/server/hierarchicaltable/HierarchicalTableServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/hierarchicaltable/HierarchicalTableServiceGrpcImpl.java index 8824b4f358e..96d5d018b09 100644 --- a/server/src/main/java/io/deephaven/server/hierarchicaltable/HierarchicalTableServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/hierarchicaltable/HierarchicalTableServiceGrpcImpl.java @@ -18,6 +18,8 @@ import io.deephaven.engine.table.impl.AbsoluteSortColumnConventions; import io.deephaven.engine.table.impl.BaseGridAttributes; import io.deephaven.engine.table.impl.hierarchical.RollupTableImpl; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.table.impl.select.WhereFilter; import io.deephaven.extensions.barrage.util.ExportUtil; import io.deephaven.internal.log.LoggerFactory; @@ -31,6 +33,7 @@ import io.deephaven.server.table.ops.AggregationAdapter; import io.deephaven.server.table.ops.FilterTableGrpcImpl; import io.deephaven.server.table.ops.filter.FilterFactory; +import io.deephaven.util.SafeCloseable; import io.grpc.stub.StreamObserver; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -73,31 +76,39 @@ public void rollup( final SessionState session = sessionService.getCurrentSession(); - final SessionState.ExportObject
sourceTableExport = ticketRouter.resolve( - session, request.getSourceTableId(), "rollup.sourceTableId"); - - session.newExport(request.getResultRollupTableId(), "rollup.resultRollupTableId") - .require(sourceTableExport) - .onError(responseObserver) - .submit(() -> { - final Table sourceTable = sourceTableExport.get(); - - authWiring.checkPermissionRollup(session.getAuthContext(), request, List.of(sourceTable)); - - final Collection aggregations = request.getAggregationsList().stream() - .map(AggregationAdapter::adapt) - .collect(Collectors.toList()); - final boolean includeConstituents = request.getIncludeConstituents(); - final Collection groupByColumns = request.getGroupByColumnsList().stream() - .map(ColumnName::of) - .collect(Collectors.toList()); - final RollupTable result = sourceTable.rollup( - aggregations, includeConstituents, groupByColumns); - - final RollupTable transformedResult = authTransformation.transform(result); - safelyComplete(responseObserver, RollupResponse.getDefaultInstance()); - return transformedResult; - }); + final String description = "HierarchicalTableService#rollup(table=" + + ticketRouter.getLogNameFor(request.getSourceTableId(), "sourceTableId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
sourceTableExport = + ticketRouter.resolve(session, request.getSourceTableId(), "sourceTableId"); + + session.newExport(request.getResultRollupTableId(), "resultRollupTableId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(sourceTableExport) + .onError(responseObserver) + .submit(() -> { + final Table sourceTable = sourceTableExport.get(); + + authWiring.checkPermissionRollup(session.getAuthContext(), request, List.of(sourceTable)); + + final Collection aggregations = request.getAggregationsList().stream() + .map(AggregationAdapter::adapt) + .collect(Collectors.toList()); + final boolean includeConstituents = request.getIncludeConstituents(); + final Collection groupByColumns = request.getGroupByColumnsList().stream() + .map(ColumnName::of) + .collect(Collectors.toList()); + final RollupTable result = sourceTable.rollup( + aggregations, includeConstituents, groupByColumns); + + final RollupTable transformedResult = authTransformation.transform(result); + safelyComplete(responseObserver, RollupResponse.getDefaultInstance()); + return transformedResult; + }); + } } private static void validate(@NotNull final RollupRequest request) { @@ -117,35 +128,43 @@ public void tree( final SessionState session = sessionService.getCurrentSession(); - final SessionState.ExportObject
sourceTableExport = ticketRouter.resolve( - session, request.getSourceTableId(), "tree.sourceTableId"); + final String description = "HierarchicalTableService#tree(table=" + + ticketRouter.getLogNameFor(request.getSourceTableId(), "sourceTableId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); - session.newExport(request.getResultTreeTableId(), "tree.resultTreeTableId") - .require(sourceTableExport) - .onError(responseObserver) - .submit(() -> { - final Table sourceTable = sourceTableExport.get(); + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
sourceTableExport = + ticketRouter.resolve(session, request.getSourceTableId(), "sourceTableId"); - authWiring.checkPermissionTree(session.getAuthContext(), request, List.of(sourceTable)); + session.newExport(request.getResultTreeTableId(), "resultTreeTableId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(sourceTableExport) + .onError(responseObserver) + .submit(() -> { + final Table sourceTable = sourceTableExport.get(); - final ColumnName identifierColumn = ColumnName.of(request.getIdentifierColumn()); - final ColumnName parentIdentifierColumn = ColumnName.of(request.getParentIdentifierColumn()); + authWiring.checkPermissionTree(session.getAuthContext(), request, List.of(sourceTable)); - final Table sourceTableToUse; - if (request.getPromoteOrphans()) { - sourceTableToUse = TreeTable.promoteOrphans( - sourceTable, identifierColumn.name(), parentIdentifierColumn.name()); - } else { - sourceTableToUse = sourceTable; - } + final ColumnName identifierColumn = ColumnName.of(request.getIdentifierColumn()); + final ColumnName parentIdentifierColumn = ColumnName.of(request.getParentIdentifierColumn()); - final TreeTable result = sourceTableToUse.tree( - identifierColumn.name(), parentIdentifierColumn.name()); + final Table sourceTableToUse; + if (request.getPromoteOrphans()) { + sourceTableToUse = TreeTable.promoteOrphans( + sourceTable, identifierColumn.name(), parentIdentifierColumn.name()); + } else { + sourceTableToUse = sourceTable; + } - final TreeTable transformedResult = authTransformation.transform(result); - safelyComplete(responseObserver, TreeResponse.getDefaultInstance()); - return transformedResult; - }); + final TreeTable result = sourceTableToUse.tree( + identifierColumn.name(), parentIdentifierColumn.name()); + + final TreeTable transformedResult = authTransformation.transform(result); + safelyComplete(responseObserver, TreeResponse.getDefaultInstance()); + return transformedResult; + }); + } } private static void validate(@NotNull final TreeRequest request) { @@ -166,79 +185,87 @@ public void apply( final SessionState session = sessionService.getCurrentSession(); - final SessionState.ExportObject> inputHierarchicalTableExport = ticketRouter.resolve( - session, request.getInputHierarchicalTableId(), "apply.inputHierarchicalTableId"); - - session.newExport(request.getResultHierarchicalTableId(), "apply.resultHierarchicalTableId") - .require(inputHierarchicalTableExport) - .onError(responseObserver) - .submit(() -> { - final HierarchicalTable inputHierarchicalTable = inputHierarchicalTableExport.get(); - - authWiring.checkPermissionApply(session.getAuthContext(), request, - List.of(inputHierarchicalTable.getSource())); - - if (request.getFiltersCount() == 0 && request.getSortsCount() == 0) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, "No operations specified"); - } - final Collection finishedConditions = request.getFiltersCount() == 0 - ? null - : FilterTableGrpcImpl.finishConditions(request.getFiltersList()); - final Collection translatedSorts = - translateAndValidateSorts(request, (BaseGridAttributes) inputHierarchicalTable); - - final HierarchicalTable result; - if (inputHierarchicalTable instanceof RollupTable) { - RollupTable rollupTable = (RollupTable) inputHierarchicalTable; - // Rollups only support filtering on the group-by columns, so we can safely use the - // aggregated node definition here. - final TableDefinition nodeDefinition = - rollupTable.getNodeDefinition(RollupTable.NodeType.Aggregated); - if (finishedConditions != null) { - final Collection filters = - makeWhereFilters(finishedConditions, nodeDefinition); - RollupTableImpl.initializeAndValidateFilters( - rollupTable.getSource(), - rollupTable.getGroupByColumns(), - filters, - message -> Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, message)); - rollupTable = rollupTable.withFilter(Filter.and(filters)); + final String description = "HierarchicalTableService#apply(table=" + + ticketRouter.getLogNameFor(request.getInputHierarchicalTableId(), "inputHierarchicalTableId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject> inputHierarchicalTableExport = + ticketRouter.resolve(session, request.getInputHierarchicalTableId(), "inputHierarchicalTableId"); + + session.newExport(request.getResultHierarchicalTableId(), "resultHierarchicalTableId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(inputHierarchicalTableExport) + .onError(responseObserver) + .submit(() -> { + final HierarchicalTable inputHierarchicalTable = inputHierarchicalTableExport.get(); + + authWiring.checkPermissionApply(session.getAuthContext(), request, + List.of(inputHierarchicalTable.getSource())); + + if (request.getFiltersCount() == 0 && request.getSortsCount() == 0) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, "No operations specified"); } - if (translatedSorts != null) { - RollupTable.NodeOperationsRecorder aggregatedSorts = - rollupTable.makeNodeOperationsRecorder(RollupTable.NodeType.Aggregated); - aggregatedSorts = aggregatedSorts.sort(translatedSorts); - if (rollupTable.includesConstituents()) { - final RollupTable.NodeOperationsRecorder constituentSorts = rollupTable - .translateAggregatedNodeOperationsForConstituentNodes(aggregatedSorts); - rollupTable = rollupTable.withNodeOperations(aggregatedSorts, constituentSorts); - } else { - rollupTable = rollupTable.withNodeOperations(aggregatedSorts); + final Collection finishedConditions = request.getFiltersCount() == 0 + ? null + : FilterTableGrpcImpl.finishConditions(request.getFiltersList()); + final Collection translatedSorts = + translateAndValidateSorts(request, (BaseGridAttributes) inputHierarchicalTable); + + final HierarchicalTable result; + if (inputHierarchicalTable instanceof RollupTable) { + RollupTable rollupTable = (RollupTable) inputHierarchicalTable; + // Rollups only support filtering on the group-by columns, so we can safely use the + // aggregated node definition here. + final TableDefinition nodeDefinition = + rollupTable.getNodeDefinition(RollupTable.NodeType.Aggregated); + if (finishedConditions != null) { + final Collection filters = + makeWhereFilters(finishedConditions, nodeDefinition); + RollupTableImpl.initializeAndValidateFilters( + rollupTable.getSource(), + rollupTable.getGroupByColumns(), + filters, + message -> Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, message)); + rollupTable = rollupTable.withFilter(Filter.and(filters)); } + if (translatedSorts != null) { + RollupTable.NodeOperationsRecorder aggregatedSorts = + rollupTable.makeNodeOperationsRecorder(RollupTable.NodeType.Aggregated); + aggregatedSorts = aggregatedSorts.sort(translatedSorts); + if (rollupTable.includesConstituents()) { + final RollupTable.NodeOperationsRecorder constituentSorts = rollupTable + .translateAggregatedNodeOperationsForConstituentNodes(aggregatedSorts); + rollupTable = rollupTable.withNodeOperations(aggregatedSorts, constituentSorts); + } else { + rollupTable = rollupTable.withNodeOperations(aggregatedSorts); + } + } + result = rollupTable; + } else if (inputHierarchicalTable instanceof TreeTable) { + TreeTable treeTable = (TreeTable) inputHierarchicalTable; + final TableDefinition nodeDefinition = treeTable.getNodeDefinition(); + if (finishedConditions != null) { + treeTable = treeTable + .withFilter(Filter.and(makeWhereFilters(finishedConditions, nodeDefinition))); + } + if (translatedSorts != null) { + TreeTable.NodeOperationsRecorder treeSorts = treeTable.makeNodeOperationsRecorder(); + treeSorts = treeSorts.sort(translatedSorts); + treeTable = treeTable.withNodeOperations(treeSorts); + } + result = treeTable; + } else { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Input is not a supported HierarchicalTable type"); } - result = rollupTable; - } else if (inputHierarchicalTable instanceof TreeTable) { - TreeTable treeTable = (TreeTable) inputHierarchicalTable; - final TableDefinition nodeDefinition = treeTable.getNodeDefinition(); - if (finishedConditions != null) { - treeTable = treeTable - .withFilter(Filter.and(makeWhereFilters(finishedConditions, nodeDefinition))); - } - if (translatedSorts != null) { - TreeTable.NodeOperationsRecorder treeSorts = treeTable.makeNodeOperationsRecorder(); - treeSorts = treeSorts.sort(translatedSorts); - treeTable = treeTable.withNodeOperations(treeSorts); - } - result = treeTable; - } else { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Input is not a supported HierarchicalTable type"); - } - - final HierarchicalTable transformedResult = authTransformation.transform(result); - safelyComplete(responseObserver, HierarchicalTableApplyResponse.getDefaultInstance()); - return transformedResult; - }); + + final HierarchicalTable transformedResult = authTransformation.transform(result); + safelyComplete(responseObserver, HierarchicalTableApplyResponse.getDefaultInstance()); + return transformedResult; + }); + } } private static void validate(@NotNull final HierarchicalTableApplyRequest request) { @@ -315,9 +342,6 @@ public void view( final SessionState session = sessionService.getCurrentSession(); - final SessionState.ExportBuilder resultExportBuilder = - session.newExport(request.getResultViewId(), "view.resultViewId"); - final boolean usedExisting; final Ticket targetTicket; switch (request.getTargetCase()) { @@ -331,65 +355,78 @@ public void view( break; case TARGET_NOT_SET: default: - throw new IllegalStateException(); - } - final SessionState.ExportObject targetExport = ticketRouter.resolve( - session, targetTicket, "view.target"); - - final SessionState.ExportObject
keyTableExport; - if (request.hasExpansions()) { - keyTableExport = ticketRouter.resolve( - session, request.getExpansions().getKeyTableId(), "view.expansions.keyTableId"); - resultExportBuilder.require(targetExport, keyTableExport); - } else { - keyTableExport = null; - resultExportBuilder.require(targetExport); + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, "No target specified"); } - resultExportBuilder.onError(responseObserver) - .submit(() -> { - final Table keyTable = keyTableExport == null ? null : keyTableExport.get(); - final Object target = targetExport.get(); - final HierarchicalTableView targetExistingView = usedExisting - ? (HierarchicalTableView) target - : null; - final HierarchicalTable targetHierarchicalTable = usedExisting - ? targetExistingView.getHierarchicalTable() - : (HierarchicalTable) target; - - authWiring.checkPermissionView(session.getAuthContext(), request, keyTable == null - ? List.of(targetHierarchicalTable.getSource()) - : List.of(keyTable, targetHierarchicalTable.getSource())); - - final HierarchicalTableView result; - if (usedExisting) { - if (keyTable != null) { - result = HierarchicalTableView.makeFromExistingView( - targetExistingView, - keyTable, - request.getExpansions().hasKeyTableActionColumn() - ? ColumnName.of(request.getExpansions().getKeyTableActionColumn()) - : null); - } else { - result = HierarchicalTableView.makeFromExistingView(targetExistingView); - } - } else { - if (keyTable != null) { - result = HierarchicalTableView.makeFromHierarchicalTable( - targetHierarchicalTable, - keyTable, - request.getExpansions().hasKeyTableActionColumn() - ? ColumnName.of(request.getExpansions().getKeyTableActionColumn()) - : null); + final String description = "HierarchicalTableService#view(table=" + + ticketRouter.getLogNameFor(targetTicket, "targetTableId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportBuilder resultExportBuilder = + session.newExport(request.getResultViewId(), "resultViewId"); + + final SessionState.ExportObject targetExport = + ticketRouter.resolve(session, targetTicket, "targetTableId"); + + final SessionState.ExportObject
keyTableExport; + if (request.hasExpansions()) { + keyTableExport = ticketRouter.resolve( + session, request.getExpansions().getKeyTableId(), "expansions.keyTableId"); + resultExportBuilder.require(targetExport, keyTableExport); + } else { + keyTableExport = null; + resultExportBuilder.require(targetExport); + } + + resultExportBuilder + .queryPerformanceRecorder(queryPerformanceRecorder) + .onError(responseObserver) + .submit(() -> { + final Table keyTable = keyTableExport == null ? null : keyTableExport.get(); + final Object target = targetExport.get(); + final HierarchicalTableView targetExistingView = usedExisting + ? (HierarchicalTableView) target + : null; + final HierarchicalTable targetHierarchicalTable = usedExisting + ? targetExistingView.getHierarchicalTable() + : (HierarchicalTable) target; + + authWiring.checkPermissionView(session.getAuthContext(), request, keyTable == null + ? List.of(targetHierarchicalTable.getSource()) + : List.of(keyTable, targetHierarchicalTable.getSource())); + + final HierarchicalTableView result; + if (usedExisting) { + if (keyTable != null) { + result = HierarchicalTableView.makeFromExistingView( + targetExistingView, + keyTable, + request.getExpansions().hasKeyTableActionColumn() + ? ColumnName.of(request.getExpansions().getKeyTableActionColumn()) + : null); + } else { + result = HierarchicalTableView.makeFromExistingView(targetExistingView); + } } else { - result = HierarchicalTableView.makeFromHierarchicalTable(targetHierarchicalTable); + if (keyTable != null) { + result = HierarchicalTableView.makeFromHierarchicalTable( + targetHierarchicalTable, + keyTable, + request.getExpansions().hasKeyTableActionColumn() + ? ColumnName.of(request.getExpansions().getKeyTableActionColumn()) + : null); + } else { + result = HierarchicalTableView.makeFromHierarchicalTable(targetHierarchicalTable); + } } - } - final HierarchicalTableView transformedResult = authTransformation.transform(result); - safelyComplete(responseObserver, HierarchicalTableViewResponse.getDefaultInstance()); - return transformedResult; - }); + final HierarchicalTableView transformedResult = authTransformation.transform(result); + safelyComplete(responseObserver, HierarchicalTableViewResponse.getDefaultInstance()); + return transformedResult; + }); + } } private static void validate(@NotNull final HierarchicalTableViewRequest request) { @@ -421,24 +458,32 @@ public void exportSource( final SessionState session = sessionService.getCurrentSession(); - final SessionState.ExportObject> hierarchicalTableExport = ticketRouter.resolve( - session, request.getHierarchicalTableId(), "exportSource.hierarchicalTableId"); - - session.newExport(request.getResultTableId(), "exportSource.resultTableId") - .require(hierarchicalTableExport) - .onError(responseObserver) - .submit(() -> { - final HierarchicalTable hierarchicalTable = hierarchicalTableExport.get(); - - final Table result = hierarchicalTable.getSource(); - authWiring.checkPermissionExportSource(session.getAuthContext(), request, List.of(result)); - - final Table transformedResult = authTransformation.transform(result); - final ExportedTableCreationResponse response = - ExportUtil.buildTableCreationResponse(request.getResultTableId(), transformedResult); - safelyComplete(responseObserver, response); - return transformedResult; - }); + final String description = "HierarchicalTableService#exportSource(table=" + + ticketRouter.getLogNameFor(request.getHierarchicalTableId(), "hierarchicalTableId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject> hierarchicalTableExport = + ticketRouter.resolve(session, request.getHierarchicalTableId(), "hierarchicalTableId"); + + session.newExport(request.getResultTableId(), "resultTableId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(hierarchicalTableExport) + .onError(responseObserver) + .submit(() -> { + final HierarchicalTable hierarchicalTable = hierarchicalTableExport.get(); + + final Table result = hierarchicalTable.getSource(); + authWiring.checkPermissionExportSource(session.getAuthContext(), request, List.of(result)); + + final Table transformedResult = authTransformation.transform(result); + final ExportedTableCreationResponse response = + ExportUtil.buildTableCreationResponse(request.getResultTableId(), transformedResult); + safelyComplete(responseObserver, response); + return transformedResult; + }); + } } private static void validate(@NotNull final HierarchicalTableSourceExportRequest request) { diff --git a/server/src/main/java/io/deephaven/server/object/ObjectServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/object/ObjectServiceGrpcImpl.java index 56e56388eb3..9b0fa5560d8 100644 --- a/server/src/main/java/io/deephaven/server/object/ObjectServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/object/ObjectServiceGrpcImpl.java @@ -8,6 +8,8 @@ import io.deephaven.base.verify.Assert; import io.deephaven.engine.liveness.LivenessScope; import io.deephaven.engine.liveness.LivenessScopeStack; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.extensions.barrage.util.GrpcUtil; import io.deephaven.plugin.type.ObjectCommunicationException; import io.deephaven.plugin.type.ObjectType; @@ -257,55 +259,65 @@ public void fetchObject( if (request.getSourceId().getTicket().getTicket().isEmpty()) { throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, "No ticket supplied"); } - final SessionState.ExportObject object = ticketRouter.resolve( - session, request.getSourceId().getTicket(), "sourceId"); - session.nonExport() - .require(object) - .onError(responseObserver) - .submit(() -> { - final Object o = object.get(); - ObjectType objectTypeInstance = getObjectTypeInstance(type, o); - - AtomicReference singleResponse = new AtomicReference<>(); - AtomicBoolean isClosed = new AtomicBoolean(false); - StreamObserver wrappedResponseObserver = new StreamObserver<>() { - @Override - public void onNext(StreamResponse value) { - singleResponse.set(FetchObjectResponse.newBuilder() - .setType(type) - .setData(value.getData().getPayload()) - .addAllTypedExportIds(value.getData().getExportedReferencesList()) - .build()); - } - @Override - public void onError(Throwable t) { - responseObserver.onError(t); - } + final String description = "ObjectService#fetchObject(object=" + + ticketRouter.getLogNameFor(request.getSourceId().getTicket(), "sourceId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject object = + ticketRouter.resolve(session, request.getSourceId().getTicket(), "sourceId"); + + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(object) + .onError(responseObserver) + .submit(() -> { + final Object o = object.get(); + ObjectType objectTypeInstance = getObjectTypeInstance(type, o); + + AtomicReference singleResponse = new AtomicReference<>(); + AtomicBoolean isClosed = new AtomicBoolean(false); + StreamObserver wrappedResponseObserver = new StreamObserver<>() { + @Override + public void onNext(StreamResponse value) { + singleResponse.set(FetchObjectResponse.newBuilder() + .setType(type) + .setData(value.getData().getPayload()) + .addAllTypedExportIds(value.getData().getExportedReferencesList()) + .build()); + } + + @Override + public void onError(Throwable t) { + responseObserver.onError(t); + } - @Override - public void onCompleted() { - isClosed.set(true); + @Override + public void onCompleted() { + isClosed.set(true); + } + }; + PluginMessageSender connection = new PluginMessageSender(wrappedResponseObserver, session); + objectTypeInstance.clientConnection(o, connection); + + FetchObjectResponse message = singleResponse.get(); + if (message == null) { + connection.onClose(); + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Plugin didn't send a response before returning from clientConnection()"); } - }; - PluginMessageSender connection = new PluginMessageSender(wrappedResponseObserver, session); - objectTypeInstance.clientConnection(o, connection); - - FetchObjectResponse message = singleResponse.get(); - if (message == null) { - connection.onClose(); - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Plugin didn't send a response before returning from clientConnection()"); - } - if (!isClosed.get()) { - connection.onClose(); - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Plugin didn't close response, use MessageStream instead for this object"); - } - GrpcUtil.safelyComplete(responseObserver, message); + if (!isClosed.get()) { + connection.onClose(); + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Plugin didn't close response, use MessageStream instead for this object"); + } + GrpcUtil.safelyComplete(responseObserver, message); - return null; - }); + return null; + }); + } } @Override diff --git a/server/src/main/java/io/deephaven/server/partitionedtable/PartitionedTableServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/partitionedtable/PartitionedTableServiceGrpcImpl.java index 91effc96849..87590cb6de6 100644 --- a/server/src/main/java/io/deephaven/server/partitionedtable/PartitionedTableServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/partitionedtable/PartitionedTableServiceGrpcImpl.java @@ -7,6 +7,8 @@ import io.deephaven.auth.codegen.impl.PartitionedTableServiceContextualAuthWiring; import io.deephaven.engine.table.PartitionedTable; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; import io.deephaven.proto.backplane.grpc.ExportedTableCreationResponse; @@ -18,6 +20,7 @@ import io.deephaven.proto.util.Exceptions; import io.deephaven.server.auth.AuthorizationProvider; import io.deephaven.server.session.*; +import io.deephaven.util.SafeCloseable; import io.grpc.stub.StreamObserver; import org.jetbrains.annotations.NotNull; @@ -55,20 +58,28 @@ public void partitionBy( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); - SessionState.ExportObject
targetTable = - ticketRouter.resolve(session, request.getTableId(), "tableId"); - - session.newExport(request.getResultId(), "resultId") - .require(targetTable) - .onError(responseObserver) - .submit(() -> { - authWiring.checkPermissionPartitionBy(session.getAuthContext(), request, - Collections.singletonList(targetTable.get())); - PartitionedTable partitionedTable = targetTable.get().partitionBy(request.getDropKeys(), - request.getKeyColumnNamesList().toArray(String[]::new)); - safelyComplete(responseObserver, PartitionByResponse.getDefaultInstance()); - return partitionedTable; - }); + final String description = "PartitionedTableService#partitionBy(table=" + + ticketRouter.getLogNameFor(request.getTableId(), "tableId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
targetTable = + ticketRouter.resolve(session, request.getTableId(), "tableId"); + + session.newExport(request.getResultId(), "resultId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(targetTable) + .onError(responseObserver) + .submit(() -> { + authWiring.checkPermissionPartitionBy(session.getAuthContext(), request, + Collections.singletonList(targetTable.get())); + PartitionedTable partitionedTable = targetTable.get().partitionBy(request.getDropKeys(), + request.getKeyColumnNamesList().toArray(String[]::new)); + safelyComplete(responseObserver, PartitionByResponse.getDefaultInstance()); + return partitionedTable; + }); + } } @Override @@ -77,28 +88,36 @@ public void merge( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); - SessionState.ExportObject partitionedTable = - ticketRouter.resolve(session, request.getPartitionedTable(), "partitionedTable"); - - session.newExport(request.getResultId(), "resultId") - .require(partitionedTable) - .onError(responseObserver) - .submit(() -> { - final Table table = partitionedTable.get().table(); - authWiring.checkPermissionMerge(session.getAuthContext(), request, - Collections.singletonList(table)); - Table merged; - if (table.isRefreshing()) { - merged = table.getUpdateGraph().sharedLock().computeLocked(partitionedTable.get()::merge); - } else { - merged = partitionedTable.get().merge(); - } - merged = authorizationTransformation.transform(merged); - final ExportedTableCreationResponse response = - buildTableCreationResponse(request.getResultId(), merged); - safelyComplete(responseObserver, response); - return merged; - }); + final String description = "PartitionedTableService#merge(table=" + + ticketRouter.getLogNameFor(request.getPartitionedTable(), "partitionedTable") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject partitionedTable = + ticketRouter.resolve(session, request.getPartitionedTable(), "partitionedTable"); + + session.newExport(request.getResultId(), "resultId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(partitionedTable) + .onError(responseObserver) + .submit(() -> { + final Table table = partitionedTable.get().table(); + authWiring.checkPermissionMerge(session.getAuthContext(), request, + Collections.singletonList(table)); + Table merged; + if (table.isRefreshing()) { + merged = table.getUpdateGraph().sharedLock().computeLocked(partitionedTable.get()::merge); + } else { + merged = partitionedTable.get().merge(); + } + merged = authorizationTransformation.transform(merged); + final ExportedTableCreationResponse response = + buildTableCreationResponse(request.getResultId(), merged); + safelyComplete(responseObserver, response); + return merged; + }); + } } @Override @@ -107,61 +126,70 @@ public void getTable( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); - SessionState.ExportObject partitionedTable = - ticketRouter.resolve(session, request.getPartitionedTable(), "partitionedTable"); - SessionState.ExportObject
keys = - ticketRouter.resolve(session, request.getKeyTableTicket(), "keyTableTicket"); - - session.newExport(request.getResultId(), "resultId") - .require(partitionedTable, keys) - .onError(responseObserver) - .submit(() -> { - Table table; - Table keyTable = keys.get(); - authWiring.checkPermissionGetTable(session.getAuthContext(), request, - List.of(partitionedTable.get().table(), keyTable)); - if (!keyTable.isRefreshing()) { - long keyTableSize = keyTable.size(); - if (keyTableSize != 1) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Provided key table does not have one row, instead has " + keyTableSize); - } - long row = keyTable.getRowSet().firstRowKey(); - Object[] values = - partitionedTable.get().keyColumnNames().stream() - .map(keyTable::getColumnSource) - .map(cs -> cs.get(row)) - .toArray(); - table = partitionedTable.get().constituentFor(values); - } else { - table = keyTable.getUpdateGraph().sharedLock().computeLocked(() -> { + final String description = "PartitionedTableService#getTable(table=" + + ticketRouter.getLogNameFor(request.getPartitionedTable(), "partitionedTable") + ", keyTable=" + + ticketRouter.getLogNameFor(request.getKeyTableTicket(), "keyTable") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject partitionedTable = + ticketRouter.resolve(session, request.getPartitionedTable(), "partitionedTable"); + final SessionState.ExportObject
keys = + ticketRouter.resolve(session, request.getKeyTableTicket(), "keyTable"); + + session.newExport(request.getResultId(), "resultId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(partitionedTable, keys) + .onError(responseObserver) + .submit(() -> { + Table table; + Table keyTable = keys.get(); + authWiring.checkPermissionGetTable(session.getAuthContext(), request, + List.of(partitionedTable.get().table(), keyTable)); + if (!keyTable.isRefreshing()) { long keyTableSize = keyTable.size(); if (keyTableSize != 1) { throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, "Provided key table does not have one row, instead has " + keyTableSize); } - Table requestedRow = partitionedTable.get().table().whereIn(keyTable, - partitionedTable.get().keyColumnNames().toArray(String[]::new)); - if (requestedRow.size() != 1) { - if (requestedRow.isEmpty()) { - throw Exceptions.statusRuntimeException(Code.NOT_FOUND, - "Key matches zero rows in the partitioned table"); - } else { - throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, - "Key matches more than one entry in the partitioned table: " - + requestedRow.size()); + long row = keyTable.getRowSet().firstRowKey(); + Object[] values = + partitionedTable.get().keyColumnNames().stream() + .map(keyTable::getColumnSource) + .map(cs -> cs.get(row)) + .toArray(); + table = partitionedTable.get().constituentFor(values); + } else { + table = keyTable.getUpdateGraph().sharedLock().computeLocked(() -> { + long keyTableSize = keyTable.size(); + if (keyTableSize != 1) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Provided key table does not have one row, instead has " + keyTableSize); } - } - return (Table) requestedRow - .getColumnSource(partitionedTable.get().constituentColumnName()) - .get(requestedRow.getRowSet().firstRowKey()); - }); - } - table = authorizationTransformation.transform(table); - final ExportedTableCreationResponse response = - buildTableCreationResponse(request.getResultId(), table); - safelyComplete(responseObserver, response); - return table; - }); + Table requestedRow = partitionedTable.get().table().whereIn(keyTable, + partitionedTable.get().keyColumnNames().toArray(String[]::new)); + if (requestedRow.size() != 1) { + if (requestedRow.isEmpty()) { + throw Exceptions.statusRuntimeException(Code.NOT_FOUND, + "Key matches zero rows in the partitioned table"); + } else { + throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, + "Key matches more than one entry in the partitioned table: " + + requestedRow.size()); + } + } + return (Table) requestedRow + .getColumnSource(partitionedTable.get().constituentColumnName()) + .get(requestedRow.getRowSet().firstRowKey()); + }); + } + table = authorizationTransformation.transform(table); + final ExportedTableCreationResponse response = + buildTableCreationResponse(request.getResultId(), table); + safelyComplete(responseObserver, response); + return table; + }); + } } } diff --git a/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java b/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java index fa3de22157b..a13a29da41e 100644 --- a/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java +++ b/server/src/main/java/io/deephaven/server/runner/DeephavenApiServer.java @@ -8,7 +8,7 @@ import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.liveness.LivenessScopeStack; import io.deephaven.engine.table.impl.OperationInitializationThreadPool; -import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorderState; import io.deephaven.engine.table.impl.util.AsyncErrorLogger; import io.deephaven.engine.table.impl.util.EngineMetrics; import io.deephaven.engine.table.impl.util.ServerStateTracker; @@ -153,8 +153,8 @@ public DeephavenApiServer run() throws IOException, ClassNotFoundException, Time EngineMetrics.maybeStartStatsCollection(); log.info().append("Starting Performance Trackers...").endl(); - QueryPerformanceRecorder.installPoolAllocationRecorder(); - QueryPerformanceRecorder.installUpdateGraphLockInstrumentation(); + QueryPerformanceRecorderState.installPoolAllocationRecorder(); + QueryPerformanceRecorderState.installUpdateGraphLockInstrumentation(); ServerStateTracker.start(); AsyncErrorLogger.init(); diff --git a/server/src/main/java/io/deephaven/server/session/SessionServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/session/SessionServiceGrpcImpl.java index cd4ce627a59..b355ba776d8 100644 --- a/server/src/main/java/io/deephaven/server/session/SessionServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/session/SessionServiceGrpcImpl.java @@ -9,6 +9,8 @@ import io.deephaven.auth.AuthenticationException; import io.deephaven.csv.util.MutableObject; import io.deephaven.engine.liveness.LivenessScopeStack; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.extensions.barrage.util.GrpcUtil; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; @@ -165,16 +167,25 @@ public void exportFromTicket( return; } - final SessionState.ExportObject source = ticketRouter.resolve( - session, request.getSourceId(), "sourceId"); - session.newExport(request.getResultId(), "resultId") - .require(source) - .onError(responseObserver) - .submit(() -> { - final Object o = source.get(); - GrpcUtil.safelyComplete(responseObserver, ExportResponse.getDefaultInstance()); - return o; - }); + final String description = "SessionService#exportFromTicket(object=" + + ticketRouter.getLogNameFor(request.getSourceId(), "sourceId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject source = + ticketRouter.resolve(session, request.getSourceId(), "sourceId"); + + session.newExport(request.getResultId(), "resultId") + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(source) + .onError(responseObserver) + .submit(() -> { + final Object o = source.get(); + GrpcUtil.safelyComplete(responseObserver, ExportResponse.getDefaultInstance()); + return o; + }); + } } @Override @@ -194,18 +205,26 @@ public void publishFromTicket( return; } - final SessionState.ExportObject source = ticketRouter.resolve( - session, request.getSourceId(), "sourceId"); - Ticket resultId = request.getResultId(); - - final SessionState.ExportBuilder publisher = ticketRouter.publish( - session, resultId, "resultId", () -> { - // when publish is complete, complete the gRPC request - GrpcUtil.safelyComplete(responseObserver, PublishResponse.getDefaultInstance()); - }); - publisher.require(source) - .onError(responseObserver) - .submit(source::get); + final String description = "SessionService#publishFromTicket(object=" + + ticketRouter.getLogNameFor(request.getSourceId(), "sourceId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject source = + ticketRouter.resolve(session, request.getSourceId(), "sourceId"); + + Ticket resultId = request.getResultId(); + + ticketRouter.publish(session, resultId, "resultId", () -> { + // when publish is complete, complete the gRPC request + GrpcUtil.safelyComplete(responseObserver, PublishResponse.getDefaultInstance()); + }) + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(source) + .onError(responseObserver) + .submit(source::get); + } } @Override diff --git a/server/src/main/java/io/deephaven/server/session/SessionState.java b/server/src/main/java/io/deephaven/server/session/SessionState.java index 56b98bbc751..862cb131855 100644 --- a/server/src/main/java/io/deephaven/server/session/SessionState.java +++ b/server/src/main/java/io/deephaven/server/session/SessionState.java @@ -10,16 +10,13 @@ import dagger.assisted.AssistedInject; import io.deephaven.base.reference.WeakSimpleReference; import io.deephaven.base.verify.Assert; -import io.deephaven.base.verify.Require; import io.deephaven.engine.liveness.LivenessArtifact; import io.deephaven.engine.liveness.LivenessReferent; import io.deephaven.engine.liveness.LivenessScopeStack; import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; -import io.deephaven.engine.table.impl.perf.QueryProcessingResults; +import io.deephaven.engine.table.impl.perf.QueryState; import io.deephaven.engine.table.impl.util.EngineMetrics; -import io.deephaven.engine.tablelogger.QueryOperationPerformanceLogLogger; -import io.deephaven.engine.tablelogger.QueryPerformanceLogLogger; import io.deephaven.engine.updategraph.DynamicNode; import io.deephaven.hash.KeyedIntObjectHash; import io.deephaven.hash.KeyedIntObjectHashMap; @@ -219,6 +216,13 @@ protected void updateExpiration(@NotNull final SessionService.TokenExpiration ex .append(MILLIS_FROM_EPOCH_FORMATTER, expiration.deadlineMillis).append(".").endl(); } + /** + * @return the session id + */ + public String getSessionId() { + return sessionId; + } + /** * @return the current expiration token for this session */ @@ -531,6 +535,9 @@ public final static class ExportObject extends LivenessArtifact { private final SessionService.ErrorTransformer errorTransformer; private final SessionState session; + /** used to keep track of performance details either for aggregation or for the async ticket resolution */ + private QueryPerformanceRecorder queryPerformanceRecorder; + /** final result of export */ private volatile T result; private volatile ExportNotification.State state = ExportNotification.State.UNKNOWN; @@ -620,6 +627,15 @@ private boolean isNonExport() { return exportId == NON_EXPORT_ID; } + private synchronized void setQueryPerformanceRecorder( + final QueryPerformanceRecorder queryPerformanceRecorder) { + if (this.queryPerformanceRecorder != null) { + throw new IllegalStateException( + "performance query recorder can only be set once on an exportable object"); + } + this.queryPerformanceRecorder = queryPerformanceRecorder; + } + /** * Sets the dependencies and tracks liveness dependencies. * @@ -662,6 +678,11 @@ private synchronized void setWork( throw new IllegalStateException("export object can only be defined once"); } hasHadWorkSet = true; + + if (queryPerformanceRecorder != null && queryPerformanceRecorder.getState() == QueryState.RUNNING) { + // transfer ownership of the qpr to the export before it can be resumed by the scheduler + queryPerformanceRecorder.suspendQuery(); + } this.requiresSerialQueue = requiresSerialQueue; if (isExportStateTerminal(state)) { @@ -963,68 +984,56 @@ private void doExport() { T localResult = null; boolean shouldLog = false; - int evaluationNumber = -1; - QueryProcessingResults queryProcessingResults = null; + final QueryPerformanceRecorder exportRecorder; try (final SafeCloseable ignored1 = session.executionContext.open(); final SafeCloseable ignored2 = LivenessScopeStack.open()) { - try { - queryProcessingResults = new QueryProcessingResults( - QueryPerformanceRecorder.getInstance()); - evaluationNumber = QueryPerformanceRecorder.getInstance() - .startQuery("session=" + session.sessionId + ",exportId=" + logIdentity); + final String queryId; + if (isNonExport()) { + queryId = "nonExport=" + logIdentity; + } else { + queryId = "exportId=" + logIdentity; + } + + final boolean isResume = queryPerformanceRecorder != null + && queryPerformanceRecorder.getState() == QueryState.SUSPENDED; + exportRecorder = Objects.requireNonNullElseGet(queryPerformanceRecorder, + () -> QueryPerformanceRecorder.newQuery("ExportObject#doWork(" + queryId + ")", + session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY)); + try (final SafeCloseable ignored3 = isResume + ? exportRecorder.resumeQuery() + : exportRecorder.startQuery()) { try { localResult = capturedExport.call(); - } finally { - shouldLog = QueryPerformanceRecorder.getInstance().endQuery(); + } catch (final Exception err) { + caughtException = err; } + shouldLog = exportRecorder.endQuery(); } catch (final Exception err) { - caughtException = err; + // end query will throw if the export runner left the QPR in a bad state + if (caughtException == null) { + caughtException = err; + } + } + + if (caughtException != null) { synchronized (this) { if (!isExportStateTerminal(state)) { maybeAssignErrorId(); if (!(caughtException instanceof StatusRuntimeException)) { - log.error().append("Internal Error '").append(errorId).append("' ").append(err).endl(); + log.error().append("Internal Error '").append(errorId).append("' ") + .append(caughtException).endl(); } setState(ExportNotification.State.FAILED); } } - } finally { - if (caughtException != null && queryProcessingResults != null) { - queryProcessingResults.setException(caughtException.toString()); - } - QueryPerformanceRecorder.resetInstance(); } - if ((shouldLog || caughtException != null) && queryProcessingResults != null) { - final EngineMetrics memLoggers = EngineMetrics.getInstance(); - final QueryPerformanceLogLogger qplLogger = memLoggers.getQplLogger(); - final QueryOperationPerformanceLogLogger qoplLogger = memLoggers.getQoplLogger(); - try { - final QueryPerformanceNugget nugget = Require.neqNull( - queryProcessingResults.getRecorder().getQueryLevelPerformanceData(), - "queryProcessingResults.getRecorder().getQueryLevelPerformanceData()"); - - // noinspection SynchronizationOnLocalVariableOrMethodParameter - synchronized (qplLogger) { - qplLogger.log(evaluationNumber, - queryProcessingResults, - nugget); - } - final List nuggets = - queryProcessingResults.getRecorder().getOperationLevelPerformanceData(); - // noinspection SynchronizationOnLocalVariableOrMethodParameter - synchronized (qoplLogger) { - int opNo = 0; - for (QueryPerformanceNugget n : nuggets) { - qoplLogger.log(opNo++, n); - } - } - } catch (final Exception e) { - log.error().append("Failed to log query performance data: ").append(e).endl(); - } + if (shouldLog || caughtException != null) { + EngineMetrics.getInstance().logQueryProcessingResults(exportRecorder, caughtException); } if (caughtException == null) { + // must set result after ending the query so that onSuccess may resume / finalize a parent query setResult(localResult); } } @@ -1310,6 +1319,18 @@ public class ExportBuilder { } } + /** + * Set the performance recorder to resume when running this export. + * + * @param queryPerformanceRecorder the performance recorder + * @return this builder + */ + public ExportBuilder queryPerformanceRecorder( + @NotNull final QueryPerformanceRecorder queryPerformanceRecorder) { + export.setQueryPerformanceRecorder(queryPerformanceRecorder); + return this; + } + /** * Some exports must happen serially w.r.t. other exports. For example, an export that acquires the exclusive * UGP lock. We enqueue these dependencies independently of the otherwise regularly concurrent exports. diff --git a/server/src/main/java/io/deephaven/server/session/TicketRouter.java b/server/src/main/java/io/deephaven/server/session/TicketRouter.java index fc8375e0411..792cad9a0c7 100644 --- a/server/src/main/java/io/deephaven/server/session/TicketRouter.java +++ b/server/src/main/java/io/deephaven/server/session/TicketRouter.java @@ -5,6 +5,7 @@ import com.google.rpc.Code; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.extensions.barrage.util.BarrageUtil; import io.deephaven.hash.KeyedIntObjectHashMap; import io.deephaven.hash.KeyedIntObjectKey; @@ -13,6 +14,7 @@ import io.deephaven.proto.backplane.grpc.Ticket; import io.deephaven.proto.util.Exceptions; import io.deephaven.server.auth.AuthorizationProvider; +import io.deephaven.util.SafeCloseable; import org.apache.arrow.flight.impl.Flight; import org.jetbrains.annotations.Nullable; @@ -65,7 +67,11 @@ public SessionState.ExportObject resolve( throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "could not resolve '" + logId + "' it's an empty ticket"); } - return getResolver(ticket.get(ticket.position()), logId).resolve(session, ticket, logId); + final String ticketName = getLogNameFor(ticket, logId); + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget( + "resolveTicket:" + ticketName)) { + return getResolver(ticket.get(ticket.position()), logId).resolve(session, ticket, logId); + } } /** @@ -113,7 +119,10 @@ public SessionState.ExportObject resolve( @Nullable final SessionState session, final Flight.FlightDescriptor descriptor, final String logId) { - return getResolver(descriptor, logId).resolve(session, descriptor, logId); + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget( + "resolveDescriptor:" + descriptor)) { + return getResolver(descriptor, logId).resolve(session, descriptor, logId); + } } /** @@ -134,9 +143,13 @@ public SessionState.ExportBuilder publish( final ByteBuffer ticket, final String logId, @Nullable final Runnable onPublish) { - final TicketResolver resolver = getResolver(ticket.get(ticket.position()), logId); - authorization.authorizePublishRequest(resolver, ticket); - return resolver.publish(session, ticket, logId, onPublish); + final String ticketName = getLogNameFor(ticket, logId); + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget( + "publishTicket:" + ticketName)) { + final TicketResolver resolver = getResolver(ticket.get(ticket.position()), logId); + authorization.authorizePublishRequest(resolver, ticket); + return resolver.publish(session, ticket, logId, onPublish); + } } /** @@ -201,9 +214,12 @@ public SessionState.ExportBuilder publish( final Flight.FlightDescriptor descriptor, final String logId, @Nullable final Runnable onPublish) { - final TicketResolver resolver = getResolver(descriptor, logId); - authorization.authorizePublishRequest(resolver, descriptor); - return resolver.publish(session, descriptor, logId, onPublish); + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget( + "publishDescriptor:" + descriptor)) { + final TicketResolver resolver = getResolver(descriptor, logId); + authorization.authorizePublishRequest(resolver, descriptor); + return resolver.publish(session, descriptor, logId, onPublish); + } } /** @@ -220,7 +236,10 @@ public SessionState.ExportObject flightInfoFor( @Nullable final SessionState session, final Flight.FlightDescriptor descriptor, final String logId) { - return getResolver(descriptor, logId).flightInfoFor(session, descriptor, logId); + try (final SafeCloseable ignored = QueryPerformanceRecorder.getInstance().getNugget( + "flightInfoForDescriptor:" + descriptor)) { + return getResolver(descriptor, logId).flightInfoFor(session, descriptor, logId); + } } /** diff --git a/server/src/main/java/io/deephaven/server/table/inputtables/InputTableServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/table/inputtables/InputTableServiceGrpcImpl.java index 4a52597c444..673e39e35b1 100644 --- a/server/src/main/java/io/deephaven/server/table/inputtables/InputTableServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/table/inputtables/InputTableServiceGrpcImpl.java @@ -8,6 +8,8 @@ import io.deephaven.engine.context.ExecutionContext; import io.deephaven.engine.table.Table; import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; import io.deephaven.engine.util.config.MutableInputTable; import io.deephaven.extensions.barrage.util.GrpcUtil; import io.deephaven.internal.log.LoggerFactory; @@ -21,6 +23,7 @@ import io.deephaven.server.session.SessionService; import io.deephaven.server.session.SessionState; import io.deephaven.server.session.TicketRouter; +import io.deephaven.util.SafeCloseable; import io.grpc.stub.StreamObserver; import org.jetbrains.annotations.NotNull; @@ -52,46 +55,56 @@ public void addTableToInputTable( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); - SessionState.ExportObject
targetTable = - ticketRouter.resolve(session, request.getInputTable(), "inputTable"); - SessionState.ExportObject
tableToAddExport = - ticketRouter.resolve(session, request.getTableToAdd(), "tableToAdd"); - - session.nonExport() - .requiresSerialQueue() - .onError(responseObserver) - .require(targetTable, tableToAddExport) - .submit(() -> { - Object inputTable = targetTable.get().getAttribute(Table.INPUT_TABLE_ATTRIBUTE); - if (!(inputTable instanceof MutableInputTable)) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Table can't be used as an input table"); - } - - MutableInputTable mutableInputTable = (MutableInputTable) inputTable; - Table tableToAdd = tableToAddExport.get(); - - authWiring.checkPermissionAddTableToInputTable( - ExecutionContext.getContext().getAuthContext(), request, - List.of(targetTable.get(), tableToAdd)); - - // validate that the columns are compatible - try { - mutableInputTable.validateAddOrModify(tableToAdd); - } catch (TableDefinition.IncompatibleTableDefinitionException exception) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Provided tables's columns are not compatible: " + exception.getMessage()); - } - - // actually add the tables contents - try { - mutableInputTable.add(tableToAdd); - GrpcUtil.safelyComplete(responseObserver, AddTableResponse.getDefaultInstance()); - } catch (IOException ioException) { - throw Exceptions.statusRuntimeException(Code.DATA_LOSS, - "Error adding table to input table"); - } - }); + final String description = "InputTableService#addTableToInputTable(inputTable=" + + ticketRouter.getLogNameFor(request.getInputTable(), "inputTable") + ", tableToAdd=" + + ticketRouter.getLogNameFor(request.getTableToAdd(), "tableToAdd") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
targetTable = + ticketRouter.resolve(session, request.getInputTable(), "inputTable"); + + final SessionState.ExportObject
tableToAddExport = + ticketRouter.resolve(session, request.getTableToAdd(), "tableToAdd"); + + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .requiresSerialQueue() + .onError(responseObserver) + .require(targetTable, tableToAddExport) + .submit(() -> { + Object inputTable = targetTable.get().getAttribute(Table.INPUT_TABLE_ATTRIBUTE); + if (!(inputTable instanceof MutableInputTable)) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Table can't be used as an input table"); + } + + MutableInputTable mutableInputTable = (MutableInputTable) inputTable; + Table tableToAdd = tableToAddExport.get(); + + authWiring.checkPermissionAddTableToInputTable( + ExecutionContext.getContext().getAuthContext(), request, + List.of(targetTable.get(), tableToAdd)); + + // validate that the columns are compatible + try { + mutableInputTable.validateAddOrModify(tableToAdd); + } catch (TableDefinition.IncompatibleTableDefinitionException exception) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Provided tables's columns are not compatible: " + exception.getMessage()); + } + + // actually add the tables contents + try { + mutableInputTable.add(tableToAdd); + GrpcUtil.safelyComplete(responseObserver, AddTableResponse.getDefaultInstance()); + } catch (IOException ioException) { + throw Exceptions.statusRuntimeException(Code.DATA_LOSS, + "Error adding table to input table"); + } + }); + } } @Override @@ -100,48 +113,58 @@ public void deleteTableFromInputTable( @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); - SessionState.ExportObject
targetTable = - ticketRouter.resolve(session, request.getInputTable(), "inputTable"); - SessionState.ExportObject
tableToDeleteExport = - ticketRouter.resolve(session, request.getTableToRemove(), "tableToDelete"); - - session.nonExport() - .requiresSerialQueue() - .onError(responseObserver) - .require(targetTable, tableToDeleteExport) - .submit(() -> { - Object inputTable = targetTable.get().getAttribute(Table.INPUT_TABLE_ATTRIBUTE); - if (!(inputTable instanceof MutableInputTable)) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Table can't be used as an input table"); - } - - MutableInputTable mutableInputTable = (MutableInputTable) inputTable; - Table tableToDelete = tableToDeleteExport.get(); - - authWiring.checkPermissionDeleteTableFromInputTable( - ExecutionContext.getContext().getAuthContext(), request, - List.of(targetTable.get(), tableToDelete)); - - // validate that the columns are compatible - try { - mutableInputTable.validateDelete(tableToDelete); - } catch (TableDefinition.IncompatibleTableDefinitionException exception) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Provided tables's columns are not compatible: " + exception.getMessage()); - } catch (UnsupportedOperationException exception) { - throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, - "Provided input table does not support delete."); - } - - // actually delete the table's contents - try { - mutableInputTable.delete(tableToDelete); - GrpcUtil.safelyComplete(responseObserver, DeleteTableResponse.getDefaultInstance()); - } catch (IOException ioException) { - throw Exceptions.statusRuntimeException(Code.DATA_LOSS, - "Error deleting table from inputtable"); - } - }); + final String description = "InputTableService#deleteTableFromInputTable(inputTable=" + + ticketRouter.getLogNameFor(request.getInputTable(), "inputTable") + ", tableToRemove=" + + ticketRouter.getLogNameFor(request.getTableToRemove(), "tableToRemove") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
targetTable = + ticketRouter.resolve(session, request.getInputTable(), "inputTable"); + + final SessionState.ExportObject
tableToRemoveExport = + ticketRouter.resolve(session, request.getTableToRemove(), "tableToRemove"); + + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .requiresSerialQueue() + .onError(responseObserver) + .require(targetTable, tableToRemoveExport) + .submit(() -> { + Object inputTable = targetTable.get().getAttribute(Table.INPUT_TABLE_ATTRIBUTE); + if (!(inputTable instanceof MutableInputTable)) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Table can't be used as an input table"); + } + + MutableInputTable mutableInputTable = (MutableInputTable) inputTable; + Table tableToRemove = tableToRemoveExport.get(); + + authWiring.checkPermissionDeleteTableFromInputTable( + ExecutionContext.getContext().getAuthContext(), request, + List.of(targetTable.get(), tableToRemove)); + + // validate that the columns are compatible + try { + mutableInputTable.validateDelete(tableToRemove); + } catch (TableDefinition.IncompatibleTableDefinitionException exception) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Provided tables's columns are not compatible: " + exception.getMessage()); + } catch (UnsupportedOperationException exception) { + throw Exceptions.statusRuntimeException(Code.INVALID_ARGUMENT, + "Provided input table does not support delete."); + } + + // actually delete the table's contents + try { + mutableInputTable.delete(tableToRemove); + GrpcUtil.safelyComplete(responseObserver, DeleteTableResponse.getDefaultInstance()); + } catch (IOException ioException) { + throw Exceptions.statusRuntimeException(Code.DATA_LOSS, + "Error deleting table from inputtable"); + } + }); + } } } diff --git a/server/src/main/java/io/deephaven/server/table/ops/TableServiceGrpcImpl.java b/server/src/main/java/io/deephaven/server/table/ops/TableServiceGrpcImpl.java index db19d235805..7574253f2c7 100644 --- a/server/src/main/java/io/deephaven/server/table/ops/TableServiceGrpcImpl.java +++ b/server/src/main/java/io/deephaven/server/table/ops/TableServiceGrpcImpl.java @@ -4,9 +4,13 @@ package io.deephaven.server.table.ops; import com.google.rpc.Code; +import io.deephaven.base.verify.Assert; import io.deephaven.clientsupport.gotorow.SeekRow; import io.deephaven.auth.codegen.impl.TableServiceContextualAuthWiring; import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.impl.perf.QueryPerformanceNugget; +import io.deephaven.engine.table.impl.perf.QueryPerformanceRecorder; +import io.deephaven.engine.table.impl.util.EngineMetrics; import io.deephaven.extensions.barrage.util.ExportUtil; import io.deephaven.internal.log.LoggerFactory; import io.deephaven.io.logger.Logger; @@ -64,9 +68,11 @@ import io.deephaven.server.session.TicketRouter; import io.deephaven.server.table.ExportedTableUpdateListener; import io.deephaven.time.DateTimeUtils; +import io.deephaven.util.SafeCloseable; import io.grpc.StatusRuntimeException; import io.grpc.stub.ServerCallStreamObserver; import io.grpc.stub.StreamObserver; +import org.apache.commons.lang3.mutable.MutableInt; import org.jetbrains.annotations.NotNull; import javax.inject.Inject; @@ -313,62 +319,76 @@ public void exactJoinTables( } @Override - public void leftJoinTables(LeftJoinTablesRequest request, - StreamObserver responseObserver) { + public void leftJoinTables( + @NotNull final LeftJoinTablesRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.LEFT_JOIN, request, responseObserver); } @Override - public void asOfJoinTables(AsOfJoinTablesRequest request, - StreamObserver responseObserver) { + public void asOfJoinTables( + @NotNull final AsOfJoinTablesRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.AS_OF_JOIN, request, responseObserver); } @Override - public void ajTables(AjRajTablesRequest request, StreamObserver responseObserver) { + public void ajTables( + @NotNull final AjRajTablesRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.AJ, request, responseObserver); } @Override - public void rajTables(AjRajTablesRequest request, StreamObserver responseObserver) { + public void rajTables( + @NotNull final AjRajTablesRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.RAJ, request, responseObserver); } @Override - public void rangeJoinTables(RangeJoinTablesRequest request, - StreamObserver responseObserver) { + public void rangeJoinTables( + @NotNull final RangeJoinTablesRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.RANGE_JOIN, request, responseObserver); } @Override - public void runChartDownsample(RunChartDownsampleRequest request, - StreamObserver responseObserver) { + public void runChartDownsample( + @NotNull final RunChartDownsampleRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.RUN_CHART_DOWNSAMPLE, request, responseObserver); } @Override - public void fetchTable(FetchTableRequest request, StreamObserver responseObserver) { + public void fetchTable( + @NotNull final FetchTableRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.FETCH_TABLE, request, responseObserver); } @Override - public void applyPreviewColumns(ApplyPreviewColumnsRequest request, - StreamObserver responseObserver) { + public void applyPreviewColumns( + @NotNull final ApplyPreviewColumnsRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.APPLY_PREVIEW_COLUMNS, request, responseObserver); } @Override - public void createInputTable(CreateInputTableRequest request, - StreamObserver responseObserver) { + public void createInputTable( + @NotNull final CreateInputTableRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.CREATE_INPUT_TABLE, request, responseObserver); } @Override - public void updateBy(UpdateByRequest request, StreamObserver responseObserver) { + public void updateBy( + @NotNull final UpdateByRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.UPDATE_BY, request, responseObserver); } - private Object getSeekValue(Literal literal, Class dataType) { + private Object getSeekValue(@NotNull final Literal literal, @NotNull final Class dataType) { if (literal.hasStringValue()) { if (BigDecimal.class.isAssignableFrom(dataType)) { return new BigDecimal(literal.getStringValue()); @@ -449,33 +469,43 @@ public void seekRow( if (sourceId.getTicket().isEmpty()) { throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "No consoleId supplied"); } - SessionState.ExportObject
exportedTable = - ticketRouter.resolve(session, sourceId, "sourceId"); - session.nonExport() - .require(exportedTable) - .onError(responseObserver) - .submit(() -> { - final Table table = exportedTable.get(); - authWiring.checkPermissionSeekRow(session.getAuthContext(), request, - Collections.singletonList(table)); - final String columnName = request.getColumnName(); - final Class dataType = table.getDefinition().getColumn(columnName).getDataType(); - final Object seekValue = getSeekValue(request.getSeekValue(), dataType); - final Long result = table.apply(new SeekRow( - request.getStartingRow(), - columnName, - seekValue, - request.getInsensitive(), - request.getContains(), - request.getIsBackward())); - SeekRowResponse.Builder rowResponse = SeekRowResponse.newBuilder(); - safelyComplete(responseObserver, rowResponse.setResultRow(result).build()); - }); - } - - @Override - public void computeColumnStatistics(ColumnStatisticsRequest request, - StreamObserver responseObserver) { + final String description = "TableService#seekRow(table=" + + ticketRouter.getLogNameFor(sourceId, "sourceId") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject
exportedTable = + ticketRouter.resolve(session, sourceId, "sourceId"); + + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(exportedTable) + .onError(responseObserver) + .submit(() -> { + final Table table = exportedTable.get(); + authWiring.checkPermissionSeekRow(session.getAuthContext(), request, + Collections.singletonList(table)); + final String columnName = request.getColumnName(); + final Class dataType = table.getDefinition().getColumn(columnName).getDataType(); + final Object seekValue = getSeekValue(request.getSeekValue(), dataType); + final Long result = table.apply(new SeekRow( + request.getStartingRow(), + columnName, + seekValue, + request.getInsensitive(), + request.getContains(), + request.getIsBackward())); + SeekRowResponse.Builder rowResponse = SeekRowResponse.newBuilder(); + safelyComplete(responseObserver, rowResponse.setResultRow(result).build()); + }); + } + } + + @Override + public void computeColumnStatistics( + @NotNull final ColumnStatisticsRequest request, + @NotNull final StreamObserver responseObserver) { oneShotOperationWrapper(BatchTableRequest.Operation.OpCase.COLUMN_STATISTICS, request, responseObserver); } @@ -491,65 +521,85 @@ public void batch( } final SessionState session = sessionService.getCurrentSession(); - // step 1: initialize exports - final List> exportBuilders = request.getOpsList().stream() - .map(op -> createBatchExportBuilder(session, op)) - .collect(Collectors.toList()); + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + "TableService#batch()", session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); - // step 2: resolve dependencies - exportBuilders.forEach(export -> export.resolveDependencies(session, exportBuilders)); + try (final SafeCloseable ignored1 = queryPerformanceRecorder.startQuery()) { + // step 1: initialize exports + final MutableInt offset = new MutableInt(0); + final List> exportBuilders = request.getOpsList().stream() + .map(op -> createBatchExportBuilder( + offset.getAndIncrement(), session, queryPerformanceRecorder, op)) + .collect(Collectors.toList()); - // step 3: check for cyclical dependencies; this is our only opportunity to check non-export cycles - // TODO: check for cycles + // step 2: resolve dependencies + exportBuilders.forEach(export -> export.resolveDependencies(session, exportBuilders)); - // step 4: submit the batched operations - final AtomicInteger remaining = new AtomicInteger(exportBuilders.size()); - final AtomicReference firstFailure = new AtomicReference<>(); + // step 3: check for cyclical dependencies; this is our only opportunity to check non-export cycles + // TODO: check for cycles - final Runnable onOneResolved = () -> { - if (remaining.decrementAndGet() == 0) { - final StatusRuntimeException failure = firstFailure.get(); - if (failure != null) { - safelyError(responseObserver, failure); - } else { - safelyComplete(responseObserver); + // step 4: submit the batched operations + final AtomicInteger remaining = new AtomicInteger(1 + exportBuilders.size()); + final AtomicReference firstFailure = new AtomicReference<>(); + + final Runnable onOneResolved = () -> { + int numRemaining = remaining.decrementAndGet(); + Assert.geqZero(numRemaining, "numRemaining"); + if (numRemaining > 0) { + return; } - } - }; - for (int i = 0; i < exportBuilders.size(); ++i) { - final BatchExportBuilder exportBuilder = exportBuilders.get(i); - final int exportId = exportBuilder.exportBuilder.getExportId(); + try (final SafeCloseable ignored2 = queryPerformanceRecorder.resumeQuery()) { + final StatusRuntimeException failure = firstFailure.get(); + if (failure != null) { + safelyError(responseObserver, failure); + } else { + safelyComplete(responseObserver); + } + if (queryPerformanceRecorder.endQuery()) { + EngineMetrics.getInstance().logQueryProcessingResults(queryPerformanceRecorder, failure); + } + } + }; - final TableReference resultId; - if (exportId == SessionState.NON_EXPORT_ID) { - resultId = TableReference.newBuilder().setBatchOffset(i).build(); - } else { - resultId = ExportTicketHelper.tableReference(exportId); - } + for (int i = 0; i < exportBuilders.size(); ++i) { + final BatchExportBuilder exportBuilder = exportBuilders.get(i); + final int exportId = exportBuilder.exportBuilder.getExportId(); - exportBuilder.exportBuilder.onError((result, errorContext, cause, dependentId) -> { - String errorInfo = errorContext; - if (dependentId != null) { - errorInfo += " dependency: " + dependentId; - } - if (cause instanceof StatusRuntimeException) { - errorInfo += " cause: " + cause.getMessage(); - firstFailure.compareAndSet(null, (StatusRuntimeException) cause); + final TableReference resultId; + if (exportId == SessionState.NON_EXPORT_ID) { + resultId = TableReference.newBuilder().setBatchOffset(i).build(); + } else { + resultId = ExportTicketHelper.tableReference(exportId); } - final ExportedTableCreationResponse response = ExportedTableCreationResponse.newBuilder() - .setResultId(resultId) - .setSuccess(false) - .setErrorInfo(errorInfo) - .build(); - safelyOnNext(responseObserver, response); - onOneResolved.run(); - }).onSuccess(table -> { - final ExportedTableCreationResponse response = - ExportUtil.buildTableCreationResponse(resultId, table); - safelyOnNext(responseObserver, response); - onOneResolved.run(); - }).submit(exportBuilder::doExport); + + exportBuilder.exportBuilder.onError((result, errorContext, cause, dependentId) -> { + String errorInfo = errorContext; + if (dependentId != null) { + errorInfo += " dependency: " + dependentId; + } + if (cause instanceof StatusRuntimeException) { + errorInfo += " cause: " + cause.getMessage(); + firstFailure.compareAndSet(null, (StatusRuntimeException) cause); + } + final ExportedTableCreationResponse response = ExportedTableCreationResponse.newBuilder() + .setResultId(resultId) + .setSuccess(false) + .setErrorInfo(errorInfo) + .build(); + safelyOnNext(responseObserver, response); + onOneResolved.run(); + }).onSuccess(table -> { + final ExportedTableCreationResponse response = + ExportUtil.buildTableCreationResponse(resultId, table); + safelyOnNext(responseObserver, response); + onOneResolved.run(); + }).submit(exportBuilder::doExport); + } + + // now that we've submitted everything we'll suspend the query and release our refcount + queryPerformanceRecorder.suspendQuery(); + onOneResolved.run(); } } @@ -575,25 +625,33 @@ public void getExportedTableCreationResponse( throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "No request ticket supplied"); } - final SessionState.ExportObject export = ticketRouter.resolve(session, request, "request"); - - session.nonExport() - .require(export) - .onError(responseObserver) - .submit(() -> { - final Object obj = export.get(); - if (!(obj instanceof Table)) { - responseObserver.onError( - Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, - "Ticket is not a table")); - return; - } - authWiring.checkPermissionGetExportedTableCreationResponse( - session.getAuthContext(), request, Collections.singletonList((Table) obj)); - final ExportedTableCreationResponse response = - ExportUtil.buildTableCreationResponse(request, (Table) obj); - safelyComplete(responseObserver, response); - }); + final String description = "TableService#getExportedTableCreationResponse(table=" + + ticketRouter.getLogNameFor(request, "request") + ")"; + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + final SessionState.ExportObject export = ticketRouter.resolve(session, request, "request"); + + session.nonExport() + .queryPerformanceRecorder(queryPerformanceRecorder) + .require(export) + .onError(responseObserver) + .submit(() -> { + final Object obj = export.get(); + if (!(obj instanceof Table)) { + responseObserver.onError( + Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, + "Ticket is not a table")); + return; + } + authWiring.checkPermissionGetExportedTableCreationResponse( + session.getAuthContext(), request, Collections.singletonList((Table) obj)); + final ExportedTableCreationResponse response = + ExportUtil.buildTableCreationResponse(request, (Table) obj); + safelyComplete(responseObserver, response); + }); + } } /** @@ -606,44 +664,59 @@ public void getExportedTableCreationResponse( */ private void oneShotOperationWrapper( final BatchTableRequest.Operation.OpCase op, - final T request, - final StreamObserver responseObserver) { + @NotNull final T request, + @NotNull final StreamObserver responseObserver) { final SessionState session = sessionService.getCurrentSession(); final GrpcTableOperation operation = getOp(op); - operation.validateRequest(request); final Ticket resultId = operation.getResultTicket(request); if (resultId.getTicket().isEmpty()) { throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "No result ticket supplied"); } - final List> dependencies = operation.getTableReferences(request).stream() - .map(ref -> resolveOneShotReference(session, ref)) - .collect(Collectors.toList()); + final String description = "TableService#" + op.name() + "(resultId=" + + ticketRouter.getLogNameFor(resultId, "TableService") + ")"; - session.newExport(resultId, "resultId") - .require(dependencies) - .onError(responseObserver) - .submit(() -> { - operation.checkPermission(request, dependencies); - final Table result = operation.create(request, dependencies); - final ExportedTableCreationResponse response = - ExportUtil.buildTableCreationResponse(resultId, result); - safelyComplete(responseObserver, response); - return result; - }); + final QueryPerformanceRecorder queryPerformanceRecorder = QueryPerformanceRecorder.newQuery( + description, session.getSessionId(), QueryPerformanceNugget.DEFAULT_FACTORY); + + try (final SafeCloseable ignored = queryPerformanceRecorder.startQuery()) { + operation.validateRequest(request); + + final List> dependencies = operation.getTableReferences(request).stream() + .map(ref -> resolveOneShotReference(session, ref)) + .collect(Collectors.toList()); + + session.newExport(resultId, "resultId") + .require(dependencies) + .onError(responseObserver) + .queryPerformanceRecorder(queryPerformanceRecorder) + .submit(() -> { + operation.checkPermission(request, dependencies); + final Table result = operation.create(request, dependencies); + final ExportedTableCreationResponse response = + ExportUtil.buildTableCreationResponse(resultId, result); + safelyComplete(responseObserver, response); + return result; + }); + } } - private SessionState.ExportObject
resolveOneShotReference(SessionState session, TableReference ref) { + private SessionState.ExportObject
resolveOneShotReference( + @NotNull final SessionState session, + @NotNull final TableReference ref) { if (!ref.hasTicket()) { throw Exceptions.statusRuntimeException(Code.FAILED_PRECONDITION, "One-shot operations must use ticket references"); } + return ticketRouter.resolve(session, ref.getTicket(), "sourceId"); } - private SessionState.ExportObject
resolveBatchReference(SessionState session, - List> exportBuilders, TableReference ref) { + private SessionState.ExportObject
resolveBatchReference( + @NotNull final SessionState session, + @NotNull final List> exportBuilders, + @NotNull final TableReference ref) { switch (ref.getRefCase()) { case TICKET: return ticketRouter.resolve(session, ref.getTicket(), "sourceId"); @@ -658,14 +731,28 @@ private SessionState.ExportObject
resolveBatchReference(SessionState sess } } - private BatchExportBuilder createBatchExportBuilder(SessionState session, BatchTableRequest.Operation op) { + private BatchExportBuilder createBatchExportBuilder( + final int offset, + @NotNull final SessionState session, + @NotNull final QueryPerformanceRecorder batchQueryPerformanceRecorder, + final BatchTableRequest.Operation op) { final GrpcTableOperation operation = getOp(op.getOpCase()); final T request = operation.getRequestFromOperation(op); operation.validateRequest(request); final Ticket resultId = operation.getResultTicket(request); + final boolean hasResultId = !resultId.getTicket().isEmpty(); final ExportBuilder
exportBuilder = - resultId.getTicket().isEmpty() ? session.nonExport() : session.newExport(resultId, "resultId"); + hasResultId ? session.newExport(resultId, "resultId") : session.nonExport(); + final String resultDescription = hasResultId + ? "resultId=" + ticketRouter.getLogNameFor(resultId, "resultId") + ", " + : ""; + + final String description = "TableService#" + op.getOpCase().name() + "(" + resultDescription + "batchOffset=" + + offset + ")"; + exportBuilder.queryPerformanceRecorder(QueryPerformanceRecorder.newSubQuery( + description, batchQueryPerformanceRecorder, QueryPerformanceNugget.DEFAULT_FACTORY)); + return new BatchExportBuilder<>(operation, request, exportBuilder); } @@ -676,13 +763,18 @@ private class BatchExportBuilder { List> dependencies; - BatchExportBuilder(GrpcTableOperation operation, T request, ExportBuilder
exportBuilder) { + BatchExportBuilder( + @NotNull final GrpcTableOperation operation, + @NotNull final T request, + @NotNull final ExportBuilder
exportBuilder) { this.operation = Objects.requireNonNull(operation); this.request = Objects.requireNonNull(request); this.exportBuilder = Objects.requireNonNull(exportBuilder); } - void resolveDependencies(SessionState session, List> exportBuilders) { + void resolveDependencies( + @NotNull final SessionState session, + @NotNull final List> exportBuilders) { dependencies = operation.getTableReferences(request).stream() .map(ref -> resolveBatchReference(session, exportBuilders, ref)) .collect(Collectors.toList()); From e596070c8740c7fbbf84d3f18ee54e870da0183b Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Sun, 19 Nov 2023 14:36:40 -0500 Subject: [PATCH 39/41] C++ Client: fix one incorrect mutex unlock, remove one needless unlock (#4855) --- cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc | 2 +- cpp-client/deephaven/dhclient/src/server/server.cc | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc b/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc index a704a1c1367..81b9f59925c 100644 --- a/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc +++ b/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc @@ -715,7 +715,7 @@ std::shared_ptr TableHandleImpl::Schema() { std::unique_lock guard(mutex_); if (schema_request_sent_) { // Schema request already sent by someone else. So wait for the successful result or error. - mutex_.unlock(); + guard.unlock(); return schema_future_.get(); } diff --git a/cpp-client/deephaven/dhclient/src/server/server.cc b/cpp-client/deephaven/dhclient/src/server/server.cc index a7d499470e6..4ee76699def 100644 --- a/cpp-client/deephaven/dhclient/src/server/server.cc +++ b/cpp-client/deephaven/dhclient/src/server/server.cc @@ -362,7 +362,6 @@ void Server::SendRpc(const std::function &c const char *message = "Server cancelled. All further RPCs are being rejected"; throw std::runtime_error(DEEPHAVEN_LOCATION_STR(message)); } - guard.unlock(); } auto status = callback(&ctx); From 89357e426ed2e432aa1f9b4f6fe99f6b28324b6b Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Mon, 20 Nov 2023 08:50:51 -0500 Subject: [PATCH 40/41] C++ Client: avoid deprecated Arrow methods, plus conform to coding style (#4857) --- .../include/public/deephaven/client/client.h | 8 +- .../include/public/deephaven/client/flight.h | 2 +- .../deephaven/client/utility/arrow_util.h | 14 +- cpp-client/deephaven/dhclient/src/client.cc | 162 +++++++++--------- cpp-client/deephaven/dhclient/src/flight.cc | 8 +- .../dhclient/src/utility/arrow_util.cc | 2 - 6 files changed, 98 insertions(+), 98 deletions(-) diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h index a99ca9c46e9..d40d0bed805 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/client.h @@ -1407,7 +1407,7 @@ class TableHandle { * @return A TableHandle referencing the new table */ [[nodiscard]] - TableHandle WAvgBy(std::string weight_column, std::vector columnSpecs) const; + TableHandle WAvgBy(std::string weight_column, std::vector column_specs) const; /** * A variadic form of WAvgBy(std::string, std::vector) const that takes a combination of * argument types. @@ -1538,7 +1538,7 @@ class TableHandle { * @return A TableHandle referencing the new table */ [[nodiscard]] - TableHandle Merge(std::string key_column, std::vector sources) const; + TableHandle Merge(std::string key_columns, std::vector sources) const; /** * A variadic form of Merge(std::string, std::vector) const that takes a combination of * argument types. @@ -1848,8 +1848,8 @@ class TableHandle { * Subscribe to a ticking table (C-style). */ [[nodiscard]] - std::shared_ptr Subscribe(onTickCallback_t onTick, void *onTickUserData, - onErrorCallback_t on_error, void *onErrorUserData); + std::shared_ptr Subscribe(onTickCallback_t on_tick, void *on_tick_user_data, + onErrorCallback_t on_error, void *on_error_user_data); /** * Unsubscribe from the table. */ diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/flight.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/flight.h index 7d8c5351658..2c14fa528b6 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/flight.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/flight.h @@ -30,7 +30,7 @@ class FlightWrapper { * @return An Arrow FlightStreamReader */ [[nodiscard]] - std::shared_ptr GetFlightStreamReader( + std::unique_ptr GetFlightStreamReader( const TableHandle &table) const; /** diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/arrow_util.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/arrow_util.h index 2c42c5d43f7..7b77f0fef15 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/arrow_util.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/arrow_util.h @@ -15,6 +15,13 @@ namespace deephaven::client::utility { arrow::flight::FlightDescriptor ConvertTicketToFlightDescriptor(const std::string &ticket); +/** + * If status is OK, do nothing. Otherwise throw a runtime error with an informative message. + * @param debug_info A DebugInfo object, typically as provided by DEEPHAVEN_LOCATION_EXPR. + * @param status the arrow::Status + */ +void OkOrThrow(const deephaven::dhcore::utility::DebugInfo &debug_info, const arrow::Status &status); + /** * If result's status is OK, do nothing. Otherwise throw a runtime error with an informative message. * @param debug_info A DebugInfo object, typically as provided by DEEPHAVEN_LOCATION_EXPR. @@ -25,13 +32,6 @@ void OkOrThrow(const deephaven::dhcore::utility::DebugInfo &debug_info, const ar OkOrThrow(debug_info, result.status()); } -/** - * If status is OK, do nothing. Otherwise throw a runtime error with an informative message. - * @param debug_info A DebugInfo object, typically as provided by DEEPHAVEN_LOCATION_EXPR. - * @param status the arrow::Status - */ -void OkOrThrow(const deephaven::dhcore::utility::DebugInfo &debug_info, const arrow::Status &status); - /** * If result's internal status is OK, return result's contained value. * Otherwise throw a runtime error with an informative message. diff --git a/cpp-client/deephaven/dhclient/src/client.cc b/cpp-client/deephaven/dhclient/src/client.cc index d6ed48dd0be..b31b86b8aa9 100644 --- a/cpp-client/deephaven/dhclient/src/client.cc +++ b/cpp-client/deephaven/dhclient/src/client.cc @@ -115,8 +115,8 @@ TableHandle TableHandleManager::EmptyTable(int64_t size) const { return TableHandle(std::move(qs_impl)); } -TableHandle TableHandleManager::FetchTable(std::string tableName) const { - auto qs_impl = impl_->FetchTable(std::move(tableName)); +TableHandle TableHandleManager::FetchTable(std::string table_name) const { + auto qs_impl = impl_->FetchTable(std::move(table_name)); return TableHandle(std::move(qs_impl)); } @@ -185,8 +185,8 @@ Aggregate::~Aggregate() = default; Aggregate::Aggregate(std::shared_ptr impl) : impl_(std::move(impl)) { } -Aggregate Aggregate::AbsSum(std::vector columnSpecs) { - return createAggForMatchPairs(ComboAggregateRequest::ABS_SUM, std::move(columnSpecs)); +Aggregate Aggregate::AbsSum(std::vector column_specs) { + return createAggForMatchPairs(ComboAggregateRequest::ABS_SUM, std::move(column_specs)); } Aggregate Aggregate::Avg(std::vector column_specs) { @@ -211,8 +211,8 @@ Aggregate Aggregate::Last(std::vector column_specs) { return createAggForMatchPairs(ComboAggregateRequest::LAST, std::move(column_specs)); } -Aggregate Aggregate::Max(std::vector columnSpecs) { - return createAggForMatchPairs(ComboAggregateRequest::MAX, std::move(columnSpecs)); +Aggregate Aggregate::Max(std::vector column_specs) { + return createAggForMatchPairs(ComboAggregateRequest::MAX, std::move(column_specs)); } Aggregate Aggregate::Med(std::vector column_specs) { @@ -306,124 +306,126 @@ TableHandle TableHandle::Sort(std::vector sortPairs) const { return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::Select(std::vector columnSpecs) const { - auto qt_impl = impl_->Select(std::move(columnSpecs)); +TableHandle TableHandle::Select(std::vector column_specs) const { + auto qt_impl = impl_->Select(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::Update(std::vector columnSpecs) const { - auto qt_impl = impl_->Update(std::move(columnSpecs)); +TableHandle TableHandle::Update(std::vector column_specs) const { + auto qt_impl = impl_->Update(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::LazyUpdate(std::vector columnSpecs) const { - auto qt_impl = impl_->LazyUpdate(std::move(columnSpecs)); +TableHandle TableHandle::LazyUpdate(std::vector column_specs) const { + auto qt_impl = impl_->LazyUpdate(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::View(std::vector columnSpecs) const { - auto qt_impl = impl_->View(std::move(columnSpecs)); +TableHandle TableHandle::View(std::vector column_specs) const { + auto qt_impl = impl_->View(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::DropColumns(std::vector columnSpecs) const { - auto qt_impl = impl_->DropColumns(std::move(columnSpecs)); +TableHandle TableHandle::DropColumns(std::vector column_specs) const { + auto qt_impl = impl_->DropColumns(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::UpdateView(std::vector columnSpecs) const { - auto qt_impl = impl_->UpdateView(std::move(columnSpecs)); +TableHandle TableHandle::UpdateView(std::vector column_specs) const { + auto qt_impl = impl_->UpdateView(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::By(std::vector columnSpecs) const { - auto qt_impl = impl_->By(std::move(columnSpecs)); +TableHandle TableHandle::By(std::vector column_specs) const { + auto qt_impl = impl_->By(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::By(AggregateCombo combo, std::vector groupByColumns) const { - auto qt_impl = impl_->By(combo.Impl()->Aggregates(), std::move(groupByColumns)); +TableHandle TableHandle::By(AggregateCombo combo, std::vector group_by_columns) const { + auto qt_impl = impl_->By(combo.Impl()->Aggregates(), std::move(group_by_columns)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::MinBy(std::vector columnSpecs) const { - auto qt_impl = impl_->MinBy(std::move(columnSpecs)); +TableHandle TableHandle::MinBy(std::vector column_specs) const { + auto qt_impl = impl_->MinBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::MaxBy(std::vector columnSpecs) const { - auto qt_impl = impl_->MaxBy(std::move(columnSpecs)); +TableHandle TableHandle::MaxBy(std::vector column_specs) const { + auto qt_impl = impl_->MaxBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::SumBy(std::vector columnSpecs) const { - auto qt_impl = impl_->SumBy(std::move(columnSpecs)); +TableHandle TableHandle::SumBy(std::vector column_specs) const { + auto qt_impl = impl_->SumBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::AbsSumBy(std::vector columnSpecs) const { - auto qt_impl = impl_->AbsSumBy(std::move(columnSpecs)); +TableHandle TableHandle::AbsSumBy(std::vector column_specs) const { + auto qt_impl = impl_->AbsSumBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::VarBy(std::vector columnSpecs) const { - auto qt_impl = impl_->VarBy(std::move(columnSpecs)); +TableHandle TableHandle::VarBy(std::vector column_specs) const { + auto qt_impl = impl_->VarBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::StdBy(std::vector columnSpecs) const { - auto qt_impl = impl_->StdBy(std::move(columnSpecs)); +TableHandle TableHandle::StdBy(std::vector column_specs) const { + auto qt_impl = impl_->StdBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::AvgBy(std::vector columnSpecs) const { - auto qt_impl = impl_->AvgBy(std::move(columnSpecs)); +TableHandle TableHandle::AvgBy(std::vector column_specs) const { + auto qt_impl = impl_->AvgBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::LastBy(std::vector columnSpecs) const { - auto qt_impl = impl_->LastBy(std::move(columnSpecs)); +TableHandle TableHandle::LastBy(std::vector column_specs) const { + auto qt_impl = impl_->LastBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::FirstBy(std::vector columnSpecs) const { - auto qt_impl = impl_->FirstBy(std::move(columnSpecs)); +TableHandle TableHandle::FirstBy(std::vector column_specs) const { + auto qt_impl = impl_->FirstBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::MedianBy(std::vector columnSpecs) const { - auto qt_impl = impl_->MedianBy(std::move(columnSpecs)); +TableHandle TableHandle::MedianBy(std::vector column_specs) const { + auto qt_impl = impl_->MedianBy(std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::PercentileBy(double percentile, bool avgMedian, - std::vector columnSpecs) const { - auto qt_impl = impl_->PercentileBy(percentile, avgMedian, std::move(columnSpecs)); +TableHandle TableHandle::PercentileBy(double percentile, bool avg_median, + std::vector column_specs) const { + auto qt_impl = impl_->PercentileBy(percentile, avg_median, std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::PercentileBy(double percentile, std::vector columnSpecs) const { - auto qt_impl = impl_->PercentileBy(percentile, std::move(columnSpecs)); +TableHandle TableHandle::PercentileBy(double percentile, std::vector column_specs) const { + auto qt_impl = impl_->PercentileBy(percentile, std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::CountBy(std::string countByColumn, std::vector columnSpecs) const { - auto qt_impl = impl_->CountBy(std::move(countByColumn), std::move(columnSpecs)); +TableHandle TableHandle::CountBy(std::string count_by_column, + std::vector column_specs) const { + auto qt_impl = impl_->CountBy(std::move(count_by_column), std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::WAvgBy(std::string weightColumn, std::vector columnSpecs) const { - auto qt_impl = impl_->WavgBy(std::move(weightColumn), std::move(columnSpecs)); +TableHandle TableHandle::WAvgBy(std::string weight_column, + std::vector column_specs) const { + auto qt_impl = impl_->WavgBy(std::move(weight_column), std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::TailBy(int64_t n, std::vector columnSpecs) const { - auto qt_impl = impl_->TailBy(n, std::move(columnSpecs)); +TableHandle TableHandle::TailBy(int64_t n, std::vector column_specs) const { + auto qt_impl = impl_->TailBy(n, std::move(column_specs)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::HeadBy(int64_t n, std::vector columnSpecs) const { - auto qt_impl = impl_->HeadBy(n, std::move(columnSpecs)); +TableHandle TableHandle::HeadBy(int64_t n, std::vector column_specs) const { + auto qt_impl = impl_->HeadBy(n, std::move(column_specs)); return TableHandle(std::move(qt_impl)); } @@ -437,40 +439,40 @@ TableHandle TableHandle::Tail(int64_t n) const { return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::Ungroup(bool nullFill, std::vector groupByColumns) const { - auto qt_impl = impl_->Ungroup(nullFill, std::move(groupByColumns)); +TableHandle TableHandle::Ungroup(bool null_fill, std::vector group_by_columns) const { + auto qt_impl = impl_->Ungroup(null_fill, std::move(group_by_columns)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::Merge(std::string keyColumn, std::vector sources) const { +TableHandle TableHandle::Merge(std::string key_columns, std::vector sources) const { std::vector source_handles; source_handles.reserve(sources.size() + 1); source_handles.push_back(impl_->Ticket()); for (const auto &s : sources) { source_handles.push_back(s.Impl()->Ticket()); } - auto qt_impl = impl_->Merge(std::move(keyColumn), std::move(source_handles)); + auto qt_impl = impl_->Merge(std::move(key_columns), std::move(source_handles)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::CrossJoin(const TableHandle &rightSide, - std::vector columnsToMatch, std::vector columnsToAdd) const { - auto qt_impl = impl_->CrossJoin(*rightSide.impl_, std::move(columnsToMatch), - std::move(columnsToAdd)); +TableHandle TableHandle::CrossJoin(const TableHandle &right_side, + std::vector columns_to_match, std::vector columns_to_add) const { + auto qt_impl = impl_->CrossJoin(*right_side.impl_, std::move(columns_to_match), + std::move(columns_to_add)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::NaturalJoin(const TableHandle &rightSide, - std::vector columnsToMatch, std::vector columnsToAdd) const { - auto qt_impl = impl_->NaturalJoin(*rightSide.impl_, std::move(columnsToMatch), - std::move(columnsToAdd)); +TableHandle TableHandle::NaturalJoin(const TableHandle &right_side, + std::vector columns_to_match, std::vector columns_to_add) const { + auto qt_impl = impl_->NaturalJoin(*right_side.impl_, std::move(columns_to_match), + std::move(columns_to_add)); return TableHandle(std::move(qt_impl)); } -TableHandle TableHandle::ExactJoin(const TableHandle &rightSide, - std::vector columnsToMatch, std::vector columnsToAdd) const { - auto qt_impl = impl_->ExactJoin(*rightSide.impl_, std::move(columnsToMatch), - std::move(columnsToAdd)); +TableHandle TableHandle::ExactJoin(const TableHandle &right_side, + std::vector columns_to_match, std::vector columns_to_add) const { + auto qt_impl = impl_->ExactJoin(*right_side.impl_, std::move(columns_to_match), + std::move(columns_to_add)); return TableHandle(std::move(qt_impl)); } @@ -550,9 +552,9 @@ std::shared_ptr TableHandle::Subscribe( } std::shared_ptr -TableHandle::Subscribe(onTickCallback_t onTick, void *onTickUserData, - onErrorCallback_t onError, void *onErrorUserData) { - return impl_->Subscribe(onTick, onTickUserData, onError, onErrorUserData); +TableHandle::Subscribe(onTickCallback_t on_tick, void *on_tick_user_data, + onErrorCallback_t on_error, void *on_error_user_data) { + return impl_->Subscribe(on_tick, on_tick_user_data, on_error, on_error_user_data); } void TableHandle::Unsubscribe(std::shared_ptr callback) { @@ -563,9 +565,9 @@ const std::string &TableHandle::GetTicketAsString() const { return impl_->Ticket().ticket(); } -std::string TableHandle::ToString(bool wantHeaders) const { +std::string TableHandle::ToString(bool want_headers) const { SimpleOstringstream oss; - oss << Stream(wantHeaders); + oss << Stream(want_headers); return std::move(oss.str()); } @@ -590,12 +592,12 @@ void PrintTableData(std::ostream &s, const TableHandle &table_handle, bool want_ } while (true) { - arrow::flight::FlightStreamChunk chunk; - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsr->Next(&chunk))); - if (chunk.data == nullptr) { + auto chunk = fsr->Next(); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(chunk)); + if (chunk->data == nullptr) { break; } - const auto *data = chunk.data.get(); + const auto *data = chunk->data.get(); const auto &columns = data->columns(); for (int64_t row_num = 0; row_num < data->num_rows(); ++row_num) { if (row_num != 0) { diff --git a/cpp-client/deephaven/dhclient/src/flight.cc b/cpp-client/deephaven/dhclient/src/flight.cc index 3f9257da04d..165377785f6 100644 --- a/cpp-client/deephaven/dhclient/src/flight.cc +++ b/cpp-client/deephaven/dhclient/src/flight.cc @@ -16,17 +16,17 @@ FlightWrapper TableHandleManager::CreateFlightWrapper() const { FlightWrapper::FlightWrapper(std::shared_ptr impl) : impl_(std::move(impl)) {} FlightWrapper::~FlightWrapper() = default; -std::shared_ptr FlightWrapper::GetFlightStreamReader( +std::unique_ptr FlightWrapper::GetFlightStreamReader( const TableHandle &table) const { arrow::flight::FlightCallOptions options; AddHeaders(&options); - std::unique_ptr fsr; arrow::flight::Ticket tkt; tkt.ticket = table.Impl()->Ticket().ticket(); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(impl_->Server()->FlightClient()->DoGet(options, tkt, &fsr))); - return fsr; + auto fsr_result = impl_->Server()->FlightClient()->DoGet(options, tkt); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsr_result)); + return std::move(*fsr_result); } void FlightWrapper::AddHeaders(arrow::flight::FlightCallOptions *options) const { diff --git a/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc b/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc index e7c6ceeec30..70ed283164b 100644 --- a/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc +++ b/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc @@ -10,8 +10,6 @@ #include #include "deephaven/dhcore/utility/utility.h" -using namespace std; - namespace deephaven::client::utility { void OkOrThrow(const deephaven::dhcore::utility::DebugInfo &debug_info, const arrow::Status &status) { From fe4b551d956b4fc9b620a907f26c2de2fcac29f8 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Mon, 20 Nov 2023 08:51:08 -0500 Subject: [PATCH 41/41] C++ Client: more avoid deprecated Arrow, more conform to style guide (#4858) * C++ Client: more conform to deprecated Arrow, more conform coding style * arrow stuff to include elsewhere --- .../dhclient/src/impl/table_handle_impl.cc | 10 ++--- .../src/subscription/subscribe_thread.cc | 37 +++++++++---------- .../dhclient/src/utility/table_maker.cc | 29 +++++++-------- 3 files changed, 35 insertions(+), 41 deletions(-) diff --git a/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc b/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc index 81b9f59925c..c7d8affc6a7 100644 --- a/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc +++ b/cpp-client/deephaven/dhclient/src/impl/table_handle_impl.cc @@ -737,15 +737,13 @@ std::shared_ptr TableHandleImpl::Schema() { ); auto fd = ConvertTicketToFlightDescriptor(ticket_.ticket()); - std::unique_ptr schema_result; - auto gs_result = server->FlightClient()->GetSchema(options, fd, &schema_result); + auto gs_result = server->FlightClient()->GetSchema(options, fd); OkOrThrow(DEEPHAVEN_LOCATION_EXPR(gs_result)); - std::shared_ptr arrow_schema; - auto schema_res = schema_result->GetSchema(nullptr, &arrow_schema); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(schema_res)); + auto schema_result = (*gs_result)->GetSchema(nullptr); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(schema_result)); - const auto &fields = arrow_schema->fields(); + const auto &fields = (*schema_result)->fields(); auto names = MakeReservedVector(fields.size()); auto types = MakeReservedVector(fields.size()); for (const auto &f: fields) { diff --git a/cpp-client/deephaven/dhclient/src/subscription/subscribe_thread.cc b/cpp-client/deephaven/dhclient/src/subscription/subscribe_thread.cc index 4ca590f6204..ad199cfa209 100644 --- a/cpp-client/deephaven/dhclient/src/subscription/subscribe_thread.cc +++ b/cpp-client/deephaven/dhclient/src/subscription/subscribe_thread.cc @@ -68,7 +68,7 @@ class SubscribeState final { class UpdateProcessor final : public SubscriptionHandle { public: [[nodiscard]] - static std::shared_ptr startThread(std::unique_ptr fsr, + static std::shared_ptr StartThread(std::unique_ptr fsr, std::unique_ptr fsw, std::shared_ptr schema, std::shared_ptr callback); @@ -158,22 +158,21 @@ std::shared_ptr SubscribeState::InvokeHelper() { descriptor.type = arrow::flight::FlightDescriptor::DescriptorType::CMD; descriptor.cmd = std::string(magic_data, 4); - std::unique_ptr fsw; - std::unique_ptr fsr; - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(client->DoExchange(fco, descriptor, &fsw, &fsr))); + auto res = client->DoExchange(fco, descriptor); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res)); auto sub_req_raw = BarrageProcessor::CreateSubscriptionRequest(ticketBytes_.data(), ticketBytes_.size()); auto buffer = std::make_shared(std::move(sub_req_raw)); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsw->WriteMetadata(std::move(buffer)))); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res->writer->WriteMetadata(std::move(buffer)))); // Run forever (until error or cancellation) - auto processor = UpdateProcessor::startThread(std::move(fsr), std::move(fsw), std::move(schema_), - std::move(callback_)); + auto processor = UpdateProcessor::StartThread(std::move(res->reader), std::move(res->writer), + std::move(schema_), std::move(callback_)); return processor; } -std::shared_ptr UpdateProcessor::startThread( +std::shared_ptr UpdateProcessor::StartThread( std::unique_ptr fsr, std::unique_ptr fsw, std::shared_ptr schema, @@ -195,12 +194,12 @@ UpdateProcessor::~UpdateProcessor() { } void UpdateProcessor::Cancel() { - static const char *const me = "UpdateProcessor::Cancel"; - gpr_log(GPR_INFO, "%s: Subscription Shutdown requested.", me); + constexpr const char *const kMe = "UpdateProcessor::Cancel"; + gpr_log(GPR_INFO, "%s: Subscription Shutdown requested.", kMe); std::unique_lock guard(mutex_); if (cancelled_) { guard.unlock(); // to be nice - gpr_log(GPR_ERROR, "%s: Already cancelled.", me); + gpr_log(GPR_ERROR, "%s: Already cancelled.", kMe); return; } cancelled_ = true; @@ -223,12 +222,12 @@ void UpdateProcessor::RunUntilCancelled(std::shared_ptr self) { void UpdateProcessor::RunForeverHelper() { // Reuse the chunk for efficiency. - arrow::flight::FlightStreamChunk flight_stream_chunk; BarrageProcessor bp(schema_); // Process Arrow Flight messages until error or cancellation. while (true) { - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsr_->Next(&flight_stream_chunk))); - const auto &cols = flight_stream_chunk.data->columns(); + auto chunk = fsr_->Next(); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(chunk)); + const auto &cols = chunk->data->columns(); auto column_sources = MakeReservedVector>(cols.size()); auto sizes = MakeReservedVector(cols.size()); for (const auto &col : cols) { @@ -239,9 +238,9 @@ void UpdateProcessor::RunForeverHelper() { const void *metadata = nullptr; size_t metadata_size = 0; - if (flight_stream_chunk.app_metadata != nullptr) { - metadata = flight_stream_chunk.app_metadata->data(); - metadata_size = flight_stream_chunk.app_metadata->size(); + if (chunk->app_metadata != nullptr) { + metadata = chunk->app_metadata->data(); + metadata_size = chunk->app_metadata->size(); } auto result = bp.ProcessNextChunk(column_sources, sizes, metadata, metadata_size); @@ -307,9 +306,9 @@ ColumnSourceAndSize ArrayToColumnSource(const arrow::Array &array) { throw std::runtime_error(DEEPHAVEN_LOCATION_STR(message)); } - const auto listElement = list_array->GetScalar(0).ValueOrDie(); + const auto list_element = list_array->GetScalar(0).ValueOrDie(); const auto *list_scalar = VerboseCast( - DEEPHAVEN_LOCATION_EXPR(listElement.get())); + DEEPHAVEN_LOCATION_EXPR(list_element.get())); const auto &list_scalar_value = list_scalar->value; ArrayToColumnSourceVisitor v(list_scalar_value); diff --git a/cpp-client/deephaven/dhclient/src/utility/table_maker.cc b/cpp-client/deephaven/dhclient/src/utility/table_maker.cc index 68d58301737..b7e4c6515c5 100644 --- a/cpp-client/deephaven/dhclient/src/utility/table_maker.cc +++ b/cpp-client/deephaven/dhclient/src/utility/table_maker.cc @@ -2,7 +2,6 @@ * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending */ #include "deephaven/client/flight.h" -#include "deephaven/client/flight.h" #include "deephaven/client/utility/table_maker.h" #include "deephaven/client/utility/arrow_util.h" #include "deephaven/dhcore/utility/utility.h" @@ -19,11 +18,11 @@ TableMaker::TableMaker() = default; TableMaker::~TableMaker() = default; void TableMaker::FinishAddColumn(std::string name, internal::TypeConverter info) { - auto kvMetadata = std::make_shared(); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(kvMetadata->Set("deephaven:type", info.DeephavenType()))); + auto kv_metadata = std::make_shared(); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(kv_metadata->Set("deephaven:type", info.DeephavenType()))); auto field = std::make_shared(std::move(name), std::move(info.DataType()), true, - std::move(kvMetadata)); + std::move(kv_metadata)); OkOrThrow(DEEPHAVEN_LOCATION_EXPR(schemaBuilder_.AddField(field))); if (columns_.empty()) { @@ -42,30 +41,28 @@ TableHandle TableMaker::MakeTable(const TableHandleManager &manager) { auto wrapper = manager.CreateFlightWrapper(); auto ticket = manager.NewTicket(); - auto flightDescriptor = ConvertTicketToFlightDescriptor(ticket); + auto flight_descriptor = ConvertTicketToFlightDescriptor(ticket); arrow::flight::FlightCallOptions options; wrapper.AddHeaders(&options); - std::unique_ptr fsw; - std::unique_ptr fmr; - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(wrapper.FlightClient()->DoPut(options, flightDescriptor, - schema, &fsw, &fmr))); + auto res = wrapper.FlightClient()->DoPut(options, flight_descriptor, schema); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res)); auto batch = arrow::RecordBatch::Make(schema, numRows_, std::move(columns_)); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsw->WriteRecordBatch(*batch))); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsw->DoneWriting())); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res->writer->WriteRecordBatch(*batch))); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res->writer->DoneWriting())); std::shared_ptr buf; - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fmr->ReadMetadata(&buf))); - OkOrThrow(DEEPHAVEN_LOCATION_EXPR(fsw->Close())); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res->reader->ReadMetadata(&buf))); + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(res->writer->Close())); return manager.MakeTableHandleFromTicket(std::move(ticket)); } namespace internal { -TypeConverter::TypeConverter(std::shared_ptr dataType, - std::string deephavenType, std::shared_ptr column) : - dataType_(std::move(dataType)), deephavenType_(std::move(deephavenType)), +TypeConverter::TypeConverter(std::shared_ptr data_type, + std::string deephaven_type, std::shared_ptr column) : + dataType_(std::move(data_type)), deephavenType_(std::move(deephaven_type)), column_(std::move(column)) {} TypeConverter::~TypeConverter() = default;