From a04d89b9ef212ec0c26338a61c1543728b39798c Mon Sep 17 00:00:00 2001 From: kinash-varvara Date: Sun, 18 Feb 2024 18:48:11 +0300 Subject: [PATCH] add WriteNanAsString option in Yson::SerializeJson (#1871) --- ydb/library/yql/minikql/dom/json.cpp | 5 +- ydb/library/yql/minikql/dom/json.h | 2 +- ydb/library/yql/minikql/dom/ut/yson_ut.cpp | 25 ++++++++ .../common/yson2/test/canondata/result.json | 5 ++ .../results.txt | 59 +++++++++++++++++++ .../yson2/test/cases/JsonWithNanAsString.sql | 8 +++ .../yql/udfs/common/yson2/yson2_udf.cpp | 5 +- 7 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 ydb/library/yql/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt create mode 100644 ydb/library/yql/udfs/common/yson2/test/cases/JsonWithNanAsString.sql diff --git a/ydb/library/yql/minikql/dom/json.cpp b/ydb/library/yql/minikql/dom/json.cpp index 32f26be426b2..a29d044adf71 100644 --- a/ydb/library/yql/minikql/dom/json.cpp +++ b/ydb/library/yql/minikql/dom/json.cpp @@ -323,10 +323,13 @@ TUnboxedValue TryParseJsonDom(const TStringBuf json, const IValueBuilder* valueB } } -TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, bool encodeUtf8) { +TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity, bool encodeUtf8, bool writeNanAsString) { TStringStream output; TJsonWriterConfig config; + config.SetFormatOutput(false); + config.WriteNanAsString = writeNanAsString; + config.FloatToStringMode = EFloatToStringMode::PREC_AUTO; TJsonWriter writer(&output, config); if (skipMapEntity) diff --git a/ydb/library/yql/minikql/dom/json.h b/ydb/library/yql/minikql/dom/json.h index b234fd856152..daa743d01a7d 100644 --- a/ydb/library/yql/minikql/dom/json.h +++ b/ydb/library/yql/minikql/dom/json.h @@ -9,6 +9,6 @@ bool IsValidJson(const TStringBuf json); NUdf::TUnboxedValue TryParseJsonDom(const TStringBuf json, const NUdf::IValueBuilder* valueBuilder, bool decodeUtf8 = false); -TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity = false, bool encodeUtf8 = false); +TString SerializeJsonDom(const NUdf::TUnboxedValuePod dom, bool skipMapEntity = false, bool encodeUtf8 = false, bool writeNanAsString = false); } diff --git a/ydb/library/yql/minikql/dom/ut/yson_ut.cpp b/ydb/library/yql/minikql/dom/ut/yson_ut.cpp index fda2b7e487e1..c07af9756e77 100644 --- a/ydb/library/yql/minikql/dom/ut/yson_ut.cpp +++ b/ydb/library/yql/minikql/dom/ut/yson_ut.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -2059,4 +2061,27 @@ Y_UNIT_TEST_SUITE(TYsonTests) { const auto time = TInstant::Now() - t; Cerr << "Time is " << time << Endl; } + + Y_UNIT_TEST(TestSerializeJsonNanInf) { + NMiniKQL::TScopedAlloc alloc(__LOCATION__); + NMiniKQL::TMemoryUsageInfo memInfo("Memory"); + NMiniKQL::THolderFactory holderFactory(alloc.Ref(), memInfo, nullptr); + NMiniKQL::TDefaultValueBuilder builder(holderFactory); + + constexpr char yson[] = + R"( + { + "Nan" = %nan; + "Inf" = %inf; + "NegInf" = %-inf + } + )"; + + TString expected(R"({"Inf":"inf","Nan":"nan","NegInf":"-inf"})"); + + const auto dom = TryParseYsonDom(yson, &builder); + TString res = SerializeJsonDom(dom, false, true, true); + + UNIT_ASSERT_EQUAL(expected, res); + } } diff --git a/ydb/library/yql/udfs/common/yson2/test/canondata/result.json b/ydb/library/yql/udfs/common/yson2/test/canondata/result.json index f4c95fca9ef4..e8db385e4542 100644 --- a/ydb/library/yql/udfs/common/yson2/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/yson2/test/canondata/result.json @@ -114,6 +114,11 @@ "uri": "file://test.test_JsonWithUtf8_/results.txt" } ], + "test.test[JsonWithNanAsString]": [ + { + "uri": "file://test.test_JsonWithNanAsString_/results.txt" + } + ], "test.test[Lists]": [ { "uri": "file://test.test_Lists_/results.txt" diff --git a/ydb/library/yql/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt b/ydb/library/yql/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt new file mode 100644 index 000000000000..ad19dad97df6 --- /dev/null +++ b/ydb/library/yql/udfs/common/yson2/test/canondata/test.test_JsonWithNanAsString_/results.txt @@ -0,0 +1,59 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ]; + [ + "column2"; + [ + "OptionalType"; + [ + "DataType"; + "Json" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "\"nan\"" + ]; + [ + "\"inf\"" + ]; + [ + "\"-inf\"" + ] + ] + ] + } + ] + } +] \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/yson2/test/cases/JsonWithNanAsString.sql b/ydb/library/yql/udfs/common/yson2/test/cases/JsonWithNanAsString.sql new file mode 100644 index 000000000000..33002ffc034c --- /dev/null +++ b/ydb/library/yql/udfs/common/yson2/test/cases/JsonWithNanAsString.sql @@ -0,0 +1,8 @@ +$src = Yson::From(0./0.); -- nan +$src1 = Yson::From(1./0.); -- inf +$src2 = Yson::From(-1./0.); -- -inf + +SELECT + Yson::SerializeJson($src, true AS WriteNanAsString), + Yson::SerializeJson($src1, true AS WriteNanAsString), + Yson::SerializeJson($src2, true AS WriteNanAsString) \ No newline at end of file diff --git a/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp b/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp index 846bf0abf3c2..45ff1bb8ffae 100644 --- a/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp +++ b/ydb/library/yql/udfs/common/yson2/yson2_udf.cpp @@ -670,9 +670,10 @@ SIMPLE_STRICT_UDF(TSerializePretty, TYson(TAutoMap)) { constexpr char SkipMapEntity[] = "SkipMapEntity"; constexpr char EncodeUtf8[] = "EncodeUtf8"; +constexpr char WriteNanAsString[] = "WriteNanAsString"; -SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional(TAutoMap, TOptional, TNamedArg, TNamedArg), 3) try { - return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false))); +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional(TAutoMap, TOptional, TNamedArg, TNamedArg, TNamedArg), 4) try { + return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false), args[4].GetOrDefault(false))); } catch (const std::exception& e) { if (ParseOptions(args[1]).Strict) { UdfTerminate((::TStringBuilder() << valueBuilder->WithCalleePosition(GetPos()) << " " << e.what()).data());