diff --git a/velox/expression/CastExpr.cpp b/velox/expression/CastExpr.cpp index f46860f2e2af..0da3b7b45e98 100644 --- a/velox/expression/CastExpr.cpp +++ b/velox/expression/CastExpr.cpp @@ -718,7 +718,7 @@ void CastExpr::applyPeeled( auto applyCustomCast = [&]() { if (castToOperator) { - castToOperator->castTo(input, context, rows, toType, result); + castToOperator->castTo(input, context, rows, toType, result, hooks_); } else { castFromOperator->castFrom(input, context, rows, toType, result); } diff --git a/velox/expression/CastExpr.h b/velox/expression/CastExpr.h index df92fc0d7514..01bcc053a56e 100644 --- a/velox/expression/CastExpr.h +++ b/velox/expression/CastExpr.h @@ -52,6 +52,16 @@ class CastOperator { const TypePtr& resultType, VectorPtr& result) const = 0; + virtual void castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result, + const std::shared_ptr& /* hooks */) const { + castTo(input, context, rows, resultType, result); + } + /// Casts a vector of the custom type to another type. This function should /// not throw when processing input rows, but report errors via /// context.setError(). diff --git a/velox/functions/prestosql/tests/JsonCastTest.cpp b/velox/functions/prestosql/tests/JsonCastTest.cpp index 7aecab1a672f..0e2aca2a5db7 100644 --- a/velox/functions/prestosql/tests/JsonCastTest.cpp +++ b/velox/functions/prestosql/tests/JsonCastTest.cpp @@ -359,7 +359,10 @@ TEST_F(JsonCastTest, fromDate) { testCastToJson( DATE(), {0, 1000, -10000, std::nullopt}, - {"1970-01-01"_sv, "1972-09-27"_sv, "1942-08-16"_sv, std::nullopt}); + {"\"1970-01-01\""_sv, + "\"1972-09-27\""_sv, + "\"1942-08-16\""_sv, + std::nullopt}); testCastToJson( DATE(), {std::nullopt, std::nullopt, std::nullopt, std::nullopt}, @@ -402,9 +405,9 @@ TEST_F(JsonCastTest, fromTimestamp) { Timestamp{10000000, 0}, Timestamp{-1, 9000}, std::nullopt}, - {"1970-01-01T00:00:00.000000000"_sv, - "1970-04-26T17:46:40.000000000"_sv, - "1969-12-31T23:59:59.000009000"_sv, + {"\"1970-01-01 00:00:00.000\""_sv, + "\"1970-04-26 17:46:40.000\""_sv, + "\"1969-12-31 23:59:59.000\""_sv, std::nullopt}); testCastToJson( TIMESTAMP(), @@ -450,6 +453,13 @@ TEST_F(JsonCastTest, fromArray) { "[red,blue]", "[null,null,purple]", "[]"}; testCastFromArray(ARRAY(JSON()), array, expectedJsonArray); + // Tests array of Timestamp elements. + TwoDimVector arrayTimestamps{ + {Timestamp{0, 0}, Timestamp{10000000, 0}}}; + std::vector> expectedTimestamp{ + "[\"1970-01-01 00:00:00.000\",\"1970-04-26 17:46:40.000\"]"}; + testCastFromArray(ARRAY(TIMESTAMP()), arrayTimestamps, expectedTimestamp); + // Tests array whose elements are of unknown type. auto arrayOfUnknownElements = makeArrayWithDictionaryElements( {std::nullopt, std::nullopt, std::nullopt, std::nullopt}, @@ -538,6 +548,14 @@ TEST_F(JsonCastTest, fromMap) { R"({"false":2,"true":null})", "{}"}; testCastFromMap(MAP(BOOLEAN(), BIGINT()), mapBoolKey, expectedBoolKey); + // Tests map with Timestamp values. + std::vector>> mapTimestamp{ + {{3, Timestamp{0, 0}}, {4, Timestamp{0, 0}}}, {}}; + std::vector> expectedTimestamp{ + R"({"3":"1970-01-01 00:00:00.000","4":"1970-01-01 00:00:00.000"})", "{}"}; + testCastFromMap( + MAP(SMALLINT(), TIMESTAMP()), mapTimestamp, expectedTimestamp); + // Tests map whose values are of unknown type. std::vector> keys{ "a"_sv, "b"_sv, "c"_sv, "d"_sv, "e"_sv, "f"_sv, "g"_sv}; @@ -624,6 +642,13 @@ TEST_F(JsonCastTest, fromRow) { child3, expectedJsonChild); + // Tests row whose children are Timestamps. + auto rowOfTimestampElements = makeRowWithDictionaryElements( + {{Timestamp{0, 0}, Timestamp{10000000, 0}}}, ROW({TIMESTAMP()})); + auto rowOfTimestampElementsExpected = makeNullableFlatVector( + {"[null]", "[\"1970-01-01 00:00:00.000\"]"}, JSON()); + testCast(rowOfTimestampElements, rowOfTimestampElementsExpected); + // Tests row whose children are of unknown type. auto rowOfUnknownChildren = makeRowWithDictionaryElements( {{std::nullopt, std::nullopt}, {std::nullopt, std::nullopt}}, diff --git a/velox/functions/prestosql/types/JsonType.cpp b/velox/functions/prestosql/types/JsonType.cpp index 1a9a2bb957eb..7875606f7870 100644 --- a/velox/functions/prestosql/types/JsonType.cpp +++ b/velox/functions/prestosql/types/JsonType.cpp @@ -45,7 +45,8 @@ void generateJsonTyped( const SimpleVector& input, int row, std::string& result, - const TypePtr& type) { + const TypePtr& type, + const std::shared_ptr& hooks) { auto value = input.valueAt(row); if constexpr (std::is_same_v) { @@ -80,9 +81,30 @@ void generateJsonTyped( folly::toAppend(value, &result); } } else if constexpr (std::is_same_v) { - result.append(std::to_string(value)); + std::string buffer; + if (hooks) { + Timestamp inputValue = value; + const auto& options = hooks->timestampToStringOptions(); + if (options.timeZone) { + inputValue.toTimezone(*(options.timeZone)); + } + buffer.resize(getMaxStringLength(options)); + const auto stringView = + Timestamp::tsToStringView(inputValue, options, buffer.data()); + buffer.resize(stringView.size()); + } else { + buffer = std::to_string(value); + } + result.reserve(buffer.size() + 2); + result.append("\""); + result.append(buffer); + result.append("\""); } else if (type->isDate()) { - result.append(DATE()->toString(value)); + std::string stringValue = DATE()->toString(value); + result.reserve(stringValue.size() + 2); + result.append("\""); + result.append(stringValue); + result.append("\""); } else if (type->isDecimal()) { result.append(DecimalUtil::toString(value, type)); } else { @@ -96,7 +118,8 @@ void generateJsonNonKeyTyped( const SimpleVector& inputVector, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult) { + FlatVector& flatResult, + const std::shared_ptr& hooks) { std::string result; context.applyToSelectedNoThrow(rows, [&](auto row) { if (inputVector.isNullAt(row)) { @@ -104,7 +127,7 @@ void generateJsonNonKeyTyped( } else { result.clear(); generateJsonTyped( - inputVector, row, result, inputVector.type()); + inputVector, row, result, inputVector.type(), hooks); flatResult.set(row, StringView{result}); } @@ -116,7 +139,8 @@ void generateJsonKeyTyped( const SimpleVector& inputVector, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult) { + FlatVector& flatResult, + const std::shared_ptr& hooks) { std::string result; context.applyToSelectedNoThrow(rows, [&](auto row) { if (inputVector.isNullAt(row)) { @@ -129,7 +153,7 @@ void generateJsonKeyTyped( } generateJsonTyped( - inputVector, row, result, inputVector.type()); + inputVector, row, result, inputVector.type(), hooks); if constexpr (!std::is_same_v) { result.append("\""); @@ -149,6 +173,7 @@ void castToJson( exec::EvalCtx& context, const SelectivityVector& rows, FlatVector& flatResult, + const std::shared_ptr& hooks, bool isMapKey = false) { using T = typename TypeTraits::NativeType; @@ -160,15 +185,18 @@ void castToJson( if (FOLLY_LIKELY(!legacyCast)) { if (!isMapKey) { generateJsonNonKeyTyped( - *inputVector, context, rows, flatResult); + *inputVector, context, rows, flatResult, hooks); } else { - generateJsonKeyTyped(*inputVector, context, rows, flatResult); + generateJsonKeyTyped( + *inputVector, context, rows, flatResult, hooks); } } else { if (!isMapKey) { - generateJsonNonKeyTyped(*inputVector, context, rows, flatResult); + generateJsonNonKeyTyped( + *inputVector, context, rows, flatResult, hooks); } else { - generateJsonKeyTyped(*inputVector, context, rows, flatResult); + generateJsonKeyTyped( + *inputVector, context, rows, flatResult, hooks); } } } @@ -178,19 +206,22 @@ void castToJsonFromArray( const BaseVector& input, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult); + FlatVector& flatResult, + const std::shared_ptr& hooks); void castToJsonFromMap( const BaseVector& input, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult); + FlatVector& flatResult, + const std::shared_ptr& hooks); void castToJsonFromRow( const BaseVector& input, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult); + FlatVector& flatResult, + const std::shared_ptr& hooks); // Casts complex-type input vectors to Json type. template < @@ -201,16 +232,17 @@ void castToJson( exec::EvalCtx& context, const SelectivityVector& rows, FlatVector& flatResult, + const std::shared_ptr& hooks, bool isMapKey = false) { VELOX_CHECK( !isMapKey, "Casting map with complex key type to JSON is not supported"); if constexpr (kind == TypeKind::ARRAY) { - castToJsonFromArray(input, context, rows, flatResult); + castToJsonFromArray(input, context, rows, flatResult, hooks); } else if constexpr (kind == TypeKind::MAP) { - castToJsonFromMap(input, context, rows, flatResult); + castToJsonFromMap(input, context, rows, flatResult, hooks); } else if constexpr (kind == TypeKind::ROW) { - castToJsonFromRow(input, context, rows, flatResult); + castToJsonFromRow(input, context, rows, flatResult, hooks); } else { VELOX_FAIL( "Casting {} to JSON is not supported.", input.type()->toString()); @@ -224,6 +256,7 @@ struct AsJson { const VectorPtr& input, const SelectivityVector& rows, const BufferPtr& elementToTopLevelRows, + const std::shared_ptr& hooks, bool isMapKey = false) : decoded_(context) { VELOX_CHECK(rows.hasSelections()); @@ -234,7 +267,7 @@ struct AsJson { json_ = input; } else { if (!exec::PeeledEncoding::isPeelable(input->encoding())) { - doCast(context, input, rows, isMapKey, json_); + doCast(context, input, rows, isMapKey, json_, hooks); } else { exec::withContextSaver([&](exec::ContextSaver& saver) { exec::LocalSelectivityVector newRowsHolder(*context.execCtx()); @@ -250,7 +283,7 @@ struct AsJson { context.saveAndReset(saver, rows); context.setPeeledEncoding(peeledEncoding); - doCast(context, peeledVectors[0], *newRows, isMapKey, json_); + doCast(context, peeledVectors[0], *newRows, isMapKey, json_, hooks); json_ = context.getPeeledEncoding()->wrap( json_->type(), context.pool(), json_, rows); }); @@ -300,7 +333,8 @@ struct AsJson { const VectorPtr& input, const SelectivityVector& baseRows, bool isMapKey, - VectorPtr& result) { + VectorPtr& result, + const std::shared_ptr& hooks) { context.ensureWritable(baseRows, JSON(), result); auto flatJsonStrings = result->as>(); @@ -311,6 +345,7 @@ struct AsJson { context, baseRows, *flatJsonStrings, + hooks, isMapKey); } @@ -344,7 +379,8 @@ void castToJsonFromArray( const BaseVector& input, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult) { + FlatVector& flatResult, + const std::shared_ptr& hooks) { // input is guaranteed to be in flat encoding when passed in. auto inputArray = input.as(); @@ -369,7 +405,8 @@ void castToJsonFromArray( auto elementToTopLevelRows = functions::getElementToTopLevelRows( elements->size(), rows, inputArray, context.pool()); - AsJson elementsAsJson{context, elements, elementsRows, elementToTopLevelRows}; + AsJson elementsAsJson{ + context, elements, elementsRows, elementToTopLevelRows, hooks}; // Estimates an upperbound of the total length of all Json strings for the // input according to the length of all elements Json strings and the @@ -422,7 +459,8 @@ void castToJsonFromMap( const BaseVector& input, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult) { + FlatVector& flatResult, + const std::shared_ptr& hooks) { // input is guaranteed to be in flat encoding when passed in. auto inputMap = input.as(); @@ -450,8 +488,9 @@ void castToJsonFromMap( // Maps with unsupported key types should have already been rejected by // JsonCastOperator::isSupportedType() beforehand. AsJson keysAsJson{ - context, mapKeys, elementsRows, elementToTopLevelRows, true}; - AsJson valuesAsJson{context, mapValues, elementsRows, elementToTopLevelRows}; + context, mapKeys, elementsRows, elementToTopLevelRows, hooks, true}; + AsJson valuesAsJson{ + context, mapValues, elementsRows, elementToTopLevelRows, hooks}; // Estimates an upperbound of the total length of all Json strings for the // input according to the length of all elements Json strings and the @@ -516,7 +555,8 @@ void castToJsonFromRow( const BaseVector& input, exec::EvalCtx& context, const SelectivityVector& rows, - FlatVector& flatResult) { + FlatVector& flatResult, + const std::shared_ptr& hooks) { // input is guaranteed to be in flat encoding when passed in. VELOX_CHECK_EQ(input.encoding(), VectorEncoding::Simple::ROW); auto inputRow = input.as(); @@ -528,7 +568,8 @@ void castToJsonFromRow( size_t childrenStringSize = 0; std::vector childrenAsJson; for (int i = 0; i < childrenSize; ++i) { - childrenAsJson.emplace_back(context, inputRow->childAt(i), rows, nullptr); + childrenAsJson.emplace_back( + context, inputRow->childAt(i), rows, nullptr, hooks); context.applyToSelectedNoThrow(rows, [&](auto row) { if (inputRow->isNullAt(row)) { @@ -1073,6 +1114,14 @@ class JsonCastOperator : public exec::CastOperator { const TypePtr& resultType, VectorPtr& result) const override; + void castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result, + const std::shared_ptr& hooks) const override; + void castFrom( const BaseVector& input, exec::EvalCtx& context, @@ -1197,13 +1246,23 @@ void JsonCastOperator::castTo( const SelectivityVector& rows, const TypePtr& resultType, VectorPtr& result) const { + castTo(input, context, rows, resultType, result, nullptr); +} + +void JsonCastOperator::castTo( + const BaseVector& input, + exec::EvalCtx& context, + const SelectivityVector& rows, + const TypePtr& resultType, + VectorPtr& result, + const std::shared_ptr& hooks) const { context.ensureWritable(rows, resultType, result); auto* flatResult = result->as>(); // Casting from VARBINARY and OPAQUE are not supported and should have been // rejected by isSupportedType() in the caller. VELOX_DYNAMIC_TYPE_DISPATCH_ALL( - castToJson, input.typeKind(), input, context, rows, *flatResult); + castToJson, input.typeKind(), input, context, rows, *flatResult, hooks); } /// Converts an input vector from Json type to the type of result vector.