From 8cc0e514c7942d607ea84189c7e7dbc436fb8d50 Mon Sep 17 00:00:00 2001 From: Roman Udovichenko Date: Sat, 22 Jun 2024 08:50:30 +0300 Subject: [PATCH] [yt provider] Anonymous tables use single group by default (#5849) --- .../providers/yt/provider/yql_yt_datasink.cpp | 9 ++++ .../yt/provider/yql_yt_op_settings.cpp | 5 +++ .../yt/provider/yql_yt_op_settings.h | 1 + .../provider/yql_yt_physical_finalizing.cpp | 2 +- .../tests/sql/sql2yql/canondata/result.json | 28 +++++++++++++ .../suites/column_group/hint_anon-disable.cfg | 4 ++ .../column_group/hint_anon-perusage.cfg | 4 ++ .../suites/column_group/hint_anon-single.cfg | 4 ++ .../sql/suites/column_group/hint_anon.sql | 9 ++++ .../column_group/hint_anon_groups-disable.cfg | 4 ++ .../hint_anon_groups-perusage.cfg | 4 ++ .../column_group/hint_anon_groups-single.cfg | 4 ++ .../suites/column_group/hint_anon_groups.sql | 9 ++++ .../part0/canondata/result.json | 21 ++++++++++ .../part10/canondata/result.json | 42 +++++++++++++++++++ .../part17/canondata/result.json | 21 ++++++++++ .../part19/canondata/result.json | 21 ++++++++++ .../part8/canondata/result.json | 21 ++++++++++ 18 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon-disable.cfg create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon-perusage.cfg create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon-single.cfg create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon.sql create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-disable.cfg create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-perusage.cfg create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-single.cfg create mode 100644 ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups.sql diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_datasink.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_datasink.cpp index 879450f2464e..784b59fb50e6 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_datasink.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_datasink.cpp @@ -271,6 +271,15 @@ class TYtDataSink : public TDataProviderBase { ctx.NewAtom(res->Child(TYtWriteTable::idx_Settings)->Pos(), normalized, TNodeFlags::MultilineContent), ctx) ); + } else if (NYql::HasSetting(*res->Child(TYtWriteTable::idx_Table)->Child(TYtTable::idx_Settings), EYtSettingType::Anonymous)) { + if (const auto mode = State_->Configuration->ColumnGroupMode.Get().GetOrElse(EColumnGroupMode::Disable); mode != EColumnGroupMode::Disable) { + res = ctx.ChangeChild(*res, TYtWriteTable::idx_Settings, + NYql::AddSetting(*res->Child(TYtWriteTable::idx_Settings), + EYtSettingType::ColumnGroups, + ctx.NewAtom(res->Child(TYtWriteTable::idx_Settings)->Pos(), NYql::GetSingleColumnGroupSpec(), TNodeFlags::MultilineContent), + ctx) + ); + } } auto mutationId = ++NextMutationId_; res = ctx.ChangeChild(*res, TYtWriteTable::idx_Settings, diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.cpp index c3d8c2d8d5c3..cccd13ec9612 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.cpp @@ -955,6 +955,11 @@ TString NormalizeColumnGroupSpec(const TStringBuf spec) { } } +const TString& GetSingleColumnGroupSpec() { + static TString GROUP = NYT::NodeToCanonicalYsonString(NYT::TNode::CreateMap()("default", NYT::TNode::CreateEntity()), NYson::EYsonFormat::Text); + return GROUP; +} + TExprNode::TPtr GetSetting(const TExprNode& settings, EYtSettingType type) { for (auto& setting : settings.Children()) { if (setting->ChildrenSize() != 0 && FromString(setting->Child(0)->Content()) == type) { diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.h b/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.h index a4a3474d5edd..b7063294d4e6 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.h +++ b/ydb/library/yql/providers/yt/provider/yql_yt_op_settings.h @@ -142,6 +142,7 @@ TExprNode::TPtr ToAtomList(const TContainer& columns, TPositionHandle pos, TExpr bool ValidateColumnGroups(const TExprNode& setting, const TStructExprType& rowType, TExprContext& ctx); TString NormalizeColumnGroupSpec(const TStringBuf spec); +const TString& GetSingleColumnGroupSpec(); TExprNode::TPtr ToColumnPairList(const TVector>& columns, TPositionHandle pos, TExprContext& ctx); diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp index f5dd95e415b3..3fc607b5b133 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp @@ -2681,7 +2681,7 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase { } if (EColumnGroupMode::Single == mode) { if (fullUsage[i]) { - groupSpecs[i] = NYT::NodeToCanonicalYsonString(NYT::TNode::CreateMap()("default", NYT::TNode::CreateEntity()), NYson::EYsonFormat::Text); + groupSpecs[i] = NYql::GetSingleColumnGroupSpec(); } } else { if (fullUsage[i]) { diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 1db25b794884..8adf67d049d6 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -4325,6 +4325,20 @@ "uri": "https://{canondata_backend}/1937027/bbc35c51807ca32a384973d8a730422ad871c54c/resource.tar.gz#test_sql2yql.test_column_group-hint_/sql.yql" } ], + "test_sql2yql.test[column_group-hint_anon]": [ + { + "checksum": "dbb0cca1127bb0c111eaf0a628b2b9a1", + "size": 4108, + "uri": "https://{canondata_backend}/1942525/2065de9dd4b18a7276f6c5d85e09a5754c9a2fd8/resource.tar.gz#test_sql2yql.test_column_group-hint_anon_/sql.yql" + } + ], + "test_sql2yql.test[column_group-hint_anon_groups]": [ + { + "checksum": "1f44e91a47e42b8d5876da52098a98ce", + "size": 4144, + "uri": "https://{canondata_backend}/1942525/2065de9dd4b18a7276f6c5d85e09a5754c9a2fd8/resource.tar.gz#test_sql2yql.test_column_group-hint_anon_groups_/sql.yql" + } + ], "test_sql2yql.test[column_group-hint_append_fail]": [ { "checksum": "18aacc13c7921e65e61411ea0a31136e", @@ -23519,6 +23533,20 @@ "uri": "https://{canondata_backend}/1937027/bbc35c51807ca32a384973d8a730422ad871c54c/resource.tar.gz#test_sql_format.test_column_group-hint_/formatted.sql" } ], + "test_sql_format.test[column_group-hint_anon]": [ + { + "checksum": "0a7f14733fa0206794e5c41780561323", + "size": 230, + "uri": "https://{canondata_backend}/1942525/2065de9dd4b18a7276f6c5d85e09a5754c9a2fd8/resource.tar.gz#test_sql_format.test_column_group-hint_anon_/formatted.sql" + } + ], + "test_sql_format.test[column_group-hint_anon_groups]": [ + { + "checksum": "554c312c9220887717b03eb9a4392cb3", + "size": 273, + "uri": "https://{canondata_backend}/1942525/2065de9dd4b18a7276f6c5d85e09a5754c9a2fd8/resource.tar.gz#test_sql_format.test_column_group-hint_anon_groups_/formatted.sql" + } + ], "test_sql_format.test[column_group-hint_append_fail]": [ { "checksum": "d8756aff7c76d47d50affb769ec2cd4c", diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon-disable.cfg b/ydb/library/yql/tests/sql/suites/column_group/hint_anon-disable.cfg new file mode 100644 index 000000000000..648ff62ddc21 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon-disable.cfg @@ -0,0 +1,4 @@ +in Input input.txt +providers yt +pragma yt.ColumnGroupMode="disable" +pragma yt.OptimizeFor="scan" diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon-perusage.cfg b/ydb/library/yql/tests/sql/suites/column_group/hint_anon-perusage.cfg new file mode 100644 index 000000000000..be63fb17e46e --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon-perusage.cfg @@ -0,0 +1,4 @@ +in Input input.txt +providers yt +pragma yt.ColumnGroupMode="perusage" +pragma yt.OptimizeFor="scan" diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon-single.cfg b/ydb/library/yql/tests/sql/suites/column_group/hint_anon-single.cfg new file mode 100644 index 000000000000..0ccae305fff2 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon-single.cfg @@ -0,0 +1,4 @@ +in Input input.txt +providers yt +pragma yt.ColumnGroupMode="single" +pragma yt.OptimizeFor="scan" diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon.sql b/ydb/library/yql/tests/sql/suites/column_group/hint_anon.sql new file mode 100644 index 000000000000..b1d85f2d1573 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon.sql @@ -0,0 +1,9 @@ +USE plato; + +$i = select * from Input where a > "a"; + +select a,b,c,d from $i; +select c,d,e,f from $i; + +-- Forces single group for $i +insert into @tmp select * from $i; diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-disable.cfg b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-disable.cfg new file mode 100644 index 000000000000..648ff62ddc21 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-disable.cfg @@ -0,0 +1,4 @@ +in Input input.txt +providers yt +pragma yt.ColumnGroupMode="disable" +pragma yt.OptimizeFor="scan" diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-perusage.cfg b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-perusage.cfg new file mode 100644 index 000000000000..be63fb17e46e --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-perusage.cfg @@ -0,0 +1,4 @@ +in Input input.txt +providers yt +pragma yt.ColumnGroupMode="perusage" +pragma yt.OptimizeFor="scan" diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-single.cfg b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-single.cfg new file mode 100644 index 000000000000..0ccae305fff2 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups-single.cfg @@ -0,0 +1,4 @@ +in Input input.txt +providers yt +pragma yt.ColumnGroupMode="single" +pragma yt.OptimizeFor="scan" diff --git a/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups.sql b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups.sql new file mode 100644 index 000000000000..384dbcfd3b1d --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/column_group/hint_anon_groups.sql @@ -0,0 +1,9 @@ +USE plato; + +$i = select * from Input where a > "a"; + +select a,b,c,d from $i; +select c,d,e,f from $i; + +-- Forces specific group for $i +insert into @tmp with column_groups="{grp=[b;c;d]}" select * from $i; diff --git a/ydb/library/yql/tests/sql/yt_native_file/part0/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part0/canondata/result.json index 9d26eb76f9a5..0a10c7095d51 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part0/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part0/canondata/result.json @@ -694,6 +694,27 @@ "uri": "https://{canondata_backend}/1775059/ac786a412ea7eeb03c2f46ced1b142e23374a289/resource.tar.gz#test.test_case-case_many_val--Results_/results.txt" } ], + "test.test[column_group-hint_anon_groups-single-Debug]": [ + { + "checksum": "5bf348df7ae8b5d8d99880c86ad09f56", + "size": 3097, + "uri": "https://{canondata_backend}/1936997/1750231bef89e714f3a763cde6bbd783904ec892/resource.tar.gz#test.test_column_group-hint_anon_groups-single-Debug_/opt.yql" + } + ], + "test.test[column_group-hint_anon_groups-single-Plan]": [ + { + "checksum": "73f0d1587e451cd10761f80c0a1d3588", + "size": 10676, + "uri": "https://{canondata_backend}/1936997/1750231bef89e714f3a763cde6bbd783904ec892/resource.tar.gz#test.test_column_group-hint_anon_groups-single-Plan_/plan.txt" + } + ], + "test.test[column_group-hint_anon_groups-single-Results]": [ + { + "checksum": "d45a995ed068b19c1a38df12d1c68865", + "size": 3442, + "uri": "https://{canondata_backend}/1936997/1750231bef89e714f3a763cde6bbd783904ec892/resource.tar.gz#test.test_column_group-hint_anon_groups-single-Results_/results.txt" + } + ], "test.test[column_order-select_where-default.txt-Debug]": [ { "checksum": "33550dd19e7f7f3984faa0659e9b0db9", diff --git a/ydb/library/yql/tests/sql/yt_native_file/part10/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part10/canondata/result.json index b1a9b3e40f36..640dc46e2db1 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part10/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part10/canondata/result.json @@ -563,6 +563,48 @@ "uri": "https://{canondata_backend}/1809005/81aef895e3303f900cc0cb6245596fe9a0da6573/resource.tar.gz#test.test_coalesce-coalesce_sugar-default.txt-Results_/results.txt" } ], + "test.test[column_group-hint_anon-perusage-Debug]": [ + { + "checksum": "5f5e08c3a416f96ca6bcc239dc03e269", + "size": 3034, + "uri": "https://{canondata_backend}/1942525/38bbc64977eadbcbd7d9c11e7e100eb2855ed8a2/resource.tar.gz#test.test_column_group-hint_anon-perusage-Debug_/opt.yql" + } + ], + "test.test[column_group-hint_anon-perusage-Plan]": [ + { + "checksum": "73f0d1587e451cd10761f80c0a1d3588", + "size": 10676, + "uri": "https://{canondata_backend}/1942525/38bbc64977eadbcbd7d9c11e7e100eb2855ed8a2/resource.tar.gz#test.test_column_group-hint_anon-perusage-Plan_/plan.txt" + } + ], + "test.test[column_group-hint_anon-perusage-Results]": [ + { + "checksum": "d45a995ed068b19c1a38df12d1c68865", + "size": 3442, + "uri": "https://{canondata_backend}/1942525/38bbc64977eadbcbd7d9c11e7e100eb2855ed8a2/resource.tar.gz#test.test_column_group-hint_anon-perusage-Results_/results.txt" + } + ], + "test.test[column_group-hint_anon-single-Debug]": [ + { + "checksum": "25861ef6a274b795deea5b87d107a3b1", + "size": 3032, + "uri": "https://{canondata_backend}/1942525/38bbc64977eadbcbd7d9c11e7e100eb2855ed8a2/resource.tar.gz#test.test_column_group-hint_anon-single-Debug_/opt.yql" + } + ], + "test.test[column_group-hint_anon-single-Plan]": [ + { + "checksum": "73f0d1587e451cd10761f80c0a1d3588", + "size": 10676, + "uri": "https://{canondata_backend}/1942525/38bbc64977eadbcbd7d9c11e7e100eb2855ed8a2/resource.tar.gz#test.test_column_group-hint_anon-single-Plan_/plan.txt" + } + ], + "test.test[column_group-hint_anon-single-Results]": [ + { + "checksum": "d45a995ed068b19c1a38df12d1c68865", + "size": 3442, + "uri": "https://{canondata_backend}/1942525/38bbc64977eadbcbd7d9c11e7e100eb2855ed8a2/resource.tar.gz#test.test_column_group-hint_anon-single-Results_/results.txt" + } + ], "test.test[column_order-select_plain_nosimple-default.txt-Debug]": [ { "checksum": "90dcb5133679abb1cdc16acdcbb8754f", diff --git a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json index c8d8d5cda3a6..fb88b6436fd8 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json @@ -604,6 +604,27 @@ "uri": "https://{canondata_backend}/1942415/9dc26178536314feaac77333a6a0e27c8703d1e2/resource.tar.gz#test.test_coalesce-coalesce--Results_/results.txt" } ], + "test.test[column_group-hint_anon-disable-Debug]": [ + { + "checksum": "9efc738eb05c150e97333a3999005fba", + "size": 2935, + "uri": "https://{canondata_backend}/1900335/7109f64012e245471fe7a235204134ac3e7a756c/resource.tar.gz#test.test_column_group-hint_anon-disable-Debug_/opt.yql" + } + ], + "test.test[column_group-hint_anon-disable-Plan]": [ + { + "checksum": "73f0d1587e451cd10761f80c0a1d3588", + "size": 10676, + "uri": "https://{canondata_backend}/1900335/7109f64012e245471fe7a235204134ac3e7a756c/resource.tar.gz#test.test_column_group-hint_anon-disable-Plan_/plan.txt" + } + ], + "test.test[column_group-hint_anon-disable-Results]": [ + { + "checksum": "d45a995ed068b19c1a38df12d1c68865", + "size": 3442, + "uri": "https://{canondata_backend}/1900335/7109f64012e245471fe7a235204134ac3e7a756c/resource.tar.gz#test.test_column_group-hint_anon-disable-Results_/results.txt" + } + ], "test.test[column_group-publish-perusage-Debug]": [ { "checksum": "b78a3fa386c93c906943521c253050bf", diff --git a/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json index 35ffdb5b3bc8..d24bb53253cf 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json @@ -743,6 +743,27 @@ "uri": "https://{canondata_backend}/1899731/35236262db500d349ca85236f500b0173ae98a5d/resource.tar.gz#test.test_blocks-tuple_nth--Results_/results.txt" } ], + "test.test[column_group-hint_anon_groups-disable-Debug]": [ + { + "checksum": "f8157255f0e6d64642bb6b729d19b34f", + "size": 3009, + "uri": "https://{canondata_backend}/1942525/bfcff962d189d4fcea4377061e05f080d425d77a/resource.tar.gz#test.test_column_group-hint_anon_groups-disable-Debug_/opt.yql" + } + ], + "test.test[column_group-hint_anon_groups-disable-Plan]": [ + { + "checksum": "73f0d1587e451cd10761f80c0a1d3588", + "size": 10676, + "uri": "https://{canondata_backend}/1942525/bfcff962d189d4fcea4377061e05f080d425d77a/resource.tar.gz#test.test_column_group-hint_anon_groups-disable-Plan_/plan.txt" + } + ], + "test.test[column_group-hint_anon_groups-disable-Results]": [ + { + "checksum": "d45a995ed068b19c1a38df12d1c68865", + "size": 3442, + "uri": "https://{canondata_backend}/1942525/bfcff962d189d4fcea4377061e05f080d425d77a/resource.tar.gz#test.test_column_group-hint_anon_groups-disable-Results_/results.txt" + } + ], "test.test[column_order-union_all_positional_unordered_fail--Debug]": [], "test.test[column_order-union_all_positional_unordered_fail--Plan]": [], "test.test[column_order-union_all_positional_unordered_fail--Results]": [ diff --git a/ydb/library/yql/tests/sql/yt_native_file/part8/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part8/canondata/result.json index 895448f367e7..4ca76a1c14cc 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part8/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part8/canondata/result.json @@ -966,6 +966,27 @@ "uri": "https://{canondata_backend}/1809005/968c44f48b1a83965c869c6c3218e862c519db88/resource.tar.gz#test.test_column_group-hint-single-Results_/Output3.yqlrun.txt.attr" } ], + "test.test[column_group-hint_anon_groups-perusage-Debug]": [ + { + "checksum": "067590e2391e0f451c071056a74b0ceb", + "size": 3099, + "uri": "https://{canondata_backend}/1925842/34f87e31ea17797037a02652cf384114b03d7912/resource.tar.gz#test.test_column_group-hint_anon_groups-perusage-Debug_/opt.yql" + } + ], + "test.test[column_group-hint_anon_groups-perusage-Plan]": [ + { + "checksum": "73f0d1587e451cd10761f80c0a1d3588", + "size": 10676, + "uri": "https://{canondata_backend}/1925842/34f87e31ea17797037a02652cf384114b03d7912/resource.tar.gz#test.test_column_group-hint_anon_groups-perusage-Plan_/plan.txt" + } + ], + "test.test[column_group-hint_anon_groups-perusage-Results]": [ + { + "checksum": "d45a995ed068b19c1a38df12d1c68865", + "size": 3442, + "uri": "https://{canondata_backend}/1925842/34f87e31ea17797037a02652cf384114b03d7912/resource.tar.gz#test.test_column_group-hint_anon_groups-perusage-Results_/results.txt" + } + ], "test.test[column_group-hint_append_fail--Debug]": [], "test.test[column_group-hint_append_fail--Plan]": [], "test.test[column_group-hint_append_fail--Results]": [