From 9d92565801f6c4813d0c22a1c56761c6e66c4ab8 Mon Sep 17 00:00:00 2001 From: kyligence-git Date: Thu, 22 Aug 2024 01:23:53 +0000 Subject: [PATCH 1/4] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240822) --- cpp-ch/clickhouse.version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index b88675c4a96d..3c9a18a1eac9 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,3 +1,3 @@ CH_ORG=Kyligence -CH_BRANCH=rebase_ch/20240820 -CH_COMMIT=b5b8245b022 +CH_BRANCH=rebase_ch/20240822 +CH_COMMIT=85d3b05e9e2 From 640172e8a63a9d24b1be6a3f7e0fb64cdb352720 Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Thu, 22 Aug 2024 13:08:53 +0800 Subject: [PATCH 2/4] fix warning for GlutenParquetFilterSuite (cherry picked from commit 39cd55ceca76a9f0abf09c63a5f7abb5939552f8) --- .../parquet/GlutenParquetFilterSuite.scala | 424 +++++++++--------- 1 file changed, 212 insertions(+), 212 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala index a2f897d37811..f8b215ec6cf3 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala @@ -45,410 +45,410 @@ class GlutenParquetFilterSuite private val result: Array[Map[String, Seq[Predicate]]] = Array( Map( // q1 "lineitem0" -> Seq( - 'l_shipdate.date.isNotNull, - 'l_shipdate.date <= LocalDate.of(1998, 9, 1) + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipdate").date <= LocalDate.of(1998, 9, 1) )), Map( // q2 "part0" -> Seq( - 'p_size.int.isNotNull, - 'p_type.string.isNotNull, - 'p_size.int === 15, - 'p_partkey.long.isNotNull + Symbol("p_size").int.isNotNull, + Symbol("p_type").string.isNotNull, + Symbol("p_size").int === 15, + Symbol("p_partkey").long.isNotNull ), "partsupp1" -> Seq( - 'ps_partkey.long.isNotNull, - 'ps_suppkey.long.isNotNull, - 'ps_supplycost.decimal(10, 0).isNotNull + Symbol("ps_partkey").long.isNotNull, + Symbol("ps_suppkey").long.isNotNull, + Symbol("ps_supplycost").decimal(10, 0).isNotNull ), "partsupp2" -> Seq( - 'ps_partkey.long.isNotNull, - 'ps_suppkey.long.isNotNull + Symbol("ps_partkey").long.isNotNull, + Symbol("ps_suppkey").long.isNotNull ), "supplier3" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "nation4" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_regionkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull, + Symbol("n_regionkey").long.isNotNull ), "region5" -> Seq( - 'r_name.string.isNotNull, - 'r_name.string === "EUROPE", - 'r_regionkey.long.isNotNull + Symbol("r_name").string.isNotNull, + Symbol("r_name").string === "EUROPE", + Symbol("r_regionkey").long.isNotNull ), "supplier6" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "nation7" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_regionkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull, + Symbol("n_regionkey").long.isNotNull ) ), Map( // q3 "customer0" -> Seq( - 'c_mktsegment.string.isNotNull, - 'c_mktsegment.string === "BUILDING", - 'c_custkey.long.isNotNull + Symbol("c_mktsegment").string.isNotNull, + Symbol("c_mktsegment").string === "BUILDING", + Symbol("c_custkey").long.isNotNull ), "orders1" -> Seq( - 'o_orderkey.long.isNotNull, - 'o_custkey.long.isNotNull, - 'o_orderdate.date.isNotNull, - 'o_orderdate.date < LocalDate.of(1995, 3, 15) + Symbol("o_orderkey").long.isNotNull, + Symbol("o_custkey").long.isNotNull, + Symbol("o_orderdate").date.isNotNull, + Symbol("o_orderdate").date < LocalDate.of(1995, 3, 15) ), "lineitem2" -> Seq( - 'l_orderkey.long.isNotNull, - 'l_shipdate.date.isNotNull, - 'l_shipdate.date > LocalDate.of(1995, 3, 15) + Symbol("l_orderkey").long.isNotNull, + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipdate").date > LocalDate.of(1995, 3, 15) ) ), Map( // q4 "orders0" -> Seq( - 'o_orderdate.date.isNotNull, - 'o_orderdate.date >= LocalDate.of(1993, 7, 1), - 'o_orderdate.date < LocalDate.of(1993, 10, 1) + Symbol("o_orderdate").date.isNotNull, + Symbol("o_orderdate").date >= LocalDate.of(1993, 7, 1), + Symbol("o_orderdate").date < LocalDate.of(1993, 10, 1) ), "lineitem1" -> Seq( - 'l_commitdate.date.isNotNull, - 'l_receiptdate.date.isNotNull + Symbol("l_commitdate").date.isNotNull, + Symbol("l_receiptdate").date.isNotNull ) ), Map( // q5 "customer0" -> Seq( - 'c_custkey.long.isNotNull, - 'c_nationkey.long.isNotNull + Symbol("c_custkey").long.isNotNull, + Symbol("c_nationkey").long.isNotNull ), "orders1" -> Seq( - 'o_orderkey.long.isNotNull, - 'o_custkey.long.isNotNull, - 'o_orderdate.date.isNotNull, - 'o_orderdate.date >= LocalDate.of(1994, 1, 1), - 'o_orderdate.date < LocalDate.of(1995, 1, 1) + Symbol("o_orderkey").long.isNotNull, + Symbol("o_custkey").long.isNotNull, + Symbol("o_orderdate").date.isNotNull, + Symbol("o_orderdate").date >= LocalDate.of(1994, 1, 1), + Symbol("o_orderdate").date < LocalDate.of(1995, 1, 1) ), "lineitem2" -> Seq( - 'l_orderkey.long.isNotNull, - 'l_suppkey.long.isNotNull + Symbol("l_orderkey").long.isNotNull, + Symbol("l_suppkey").long.isNotNull ), "supplier3" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "nation4" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_regionkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull, + Symbol("n_regionkey").long.isNotNull ), "region5" -> Seq( - 'r_name.string.isNotNull, - 'r_name.string === "ASIA", - 'r_regionkey.long.isNotNull + Symbol("r_name").string.isNotNull, + Symbol("r_name").string === "ASIA", + Symbol("r_regionkey").long.isNotNull ) ), Map( // q6 "lineitem0" -> Seq( - 'l_shipdate.date.isNotNull, - 'l_discount.decimal(10, 2).isNotNull, - 'l_quantity.decimal(10, 0).isNotNull, - 'l_shipdate.date >= LocalDate.of(1994, 1, 1), - 'l_shipdate.date < LocalDate.of(1995, 1, 1), - 'l_discount.decimal(10, 2) >= Decimal(BigDecimal(0.05), 10, 2), - 'l_discount.decimal(10, 2) <= Decimal(BigDecimal(0.07), 10, 2), - 'l_quantity.decimal(10, 0) < Decimal(24) + Symbol("l_shipdate").date.isNotNull, + Symbol("l_discount").decimal(10, 2).isNotNull, + Symbol("l_quantity").decimal(10, 0).isNotNull, + Symbol("l_shipdate").date >= LocalDate.of(1994, 1, 1), + Symbol("l_shipdate").date < LocalDate.of(1995, 1, 1), + Symbol("l_discount").decimal(10, 2) >= Decimal(BigDecimal(0.05), 10, 2), + Symbol("l_discount").decimal(10, 2) <= Decimal(BigDecimal(0.07), 10, 2), + Symbol("l_quantity").decimal(10, 0) < Decimal(24) )), Map( // q7 "supplier0" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "lineitem1" -> Seq( - 'l_shipdate.date.isNotNull, - 'l_shipdate.date >= LocalDate.of(1995, 1, 1), - 'l_shipdate.date <= LocalDate.of(1996, 12, 31), - 'l_suppkey.long.isNotNull, - 'l_orderkey.long.isNotNull + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipdate").date >= LocalDate.of(1995, 1, 1), + Symbol("l_shipdate").date <= LocalDate.of(1996, 12, 31), + Symbol("l_suppkey").long.isNotNull, + Symbol("l_orderkey").long.isNotNull ), "orders2" -> Seq( - 'o_orderkey.long.isNotNull, - 'o_custkey.long.isNotNull + Symbol("o_orderkey").long.isNotNull, + Symbol("o_custkey").long.isNotNull ), "customer3" -> Seq( - 'c_nationkey.long.isNotNull, - 'c_custkey.long.isNotNull + Symbol("c_nationkey").long.isNotNull, + Symbol("c_custkey").long.isNotNull ), "nation4" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_name.string === "FRANCE" || 'n_name.string === "GERMANY" + Symbol("n_nationkey").long.isNotNull, + Symbol("n_name").string === "FRANCE" || Symbol("n_name").string === "GERMANY" ) ), Map( // q8 "part0" -> Seq( - 'p_partkey.long.isNotNull, - 'p_type.string.isNotNull, - 'p_type.string === "ECONOMY ANODIZED STEEL" + Symbol("p_partkey").long.isNotNull, + Symbol("p_type").string.isNotNull, + Symbol("p_type").string === "ECONOMY ANODIZED STEEL" ), "lineitem1" -> Seq( - 'l_partkey.long.isNotNull, - 'l_suppkey.long.isNotNull, - 'l_orderkey.long.isNotNull + Symbol("l_partkey").long.isNotNull, + Symbol("l_suppkey").long.isNotNull, + Symbol("l_orderkey").long.isNotNull ), "supplier2" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "orders3" -> Seq( - 'o_orderkey.long.isNotNull, - 'o_custkey.long.isNotNull, - 'o_orderdate.date.isNotNull, - 'o_orderdate.date >= LocalDate.of(1995, 1, 1), - 'o_orderdate.date <= LocalDate.of(1996, 12, 31) + Symbol("o_orderkey").long.isNotNull, + Symbol("o_custkey").long.isNotNull, + Symbol("o_orderdate").date.isNotNull, + Symbol("o_orderdate").date >= LocalDate.of(1995, 1, 1), + Symbol("o_orderdate").date <= LocalDate.of(1996, 12, 31) ), "customer4" -> Seq( - 'c_custkey.long.isNotNull, - 'c_nationkey.long.isNotNull + Symbol("c_custkey").long.isNotNull, + Symbol("c_nationkey").long.isNotNull ), "nation5" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_regionkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull, + Symbol("n_regionkey").long.isNotNull ), "nation6" -> Seq( - 'n_nationkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull ), "region7" -> Seq( - 'r_regionkey.long.isNotNull, - 'r_name.string.isNotNull, - 'r_name.string === "AMERICA" + Symbol("r_regionkey").long.isNotNull, + Symbol("r_name").string.isNotNull, + Symbol("r_name").string === "AMERICA" ) ), Map( // q9 "part0" -> Seq( - 'p_partkey.long.isNotNull, - 'p_name.string.isNotNull + Symbol("p_partkey").long.isNotNull, + Symbol("p_name").string.isNotNull ), "lineitem1" -> Seq( - 'l_partkey.long.isNotNull, - 'l_suppkey.long.isNotNull, - 'l_orderkey.long.isNotNull + Symbol("l_partkey").long.isNotNull, + Symbol("l_suppkey").long.isNotNull, + Symbol("l_orderkey").long.isNotNull ), "supplier2" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "partsupp3" -> Seq( - 'ps_partkey.long.isNotNull, - 'ps_suppkey.long.isNotNull + Symbol("ps_partkey").long.isNotNull, + Symbol("ps_suppkey").long.isNotNull ), "orders4" -> Seq( - 'o_orderkey.long.isNotNull + Symbol("o_orderkey").long.isNotNull ), "nation5" -> Seq( - 'n_nationkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull ) ), Map( // q10 "customer0" -> Seq( - 'c_custkey.long.isNotNull, - 'c_nationkey.long.isNotNull + Symbol("c_custkey").long.isNotNull, + Symbol("c_nationkey").long.isNotNull ), "orders1" -> Seq( - 'o_orderkey.long.isNotNull, - 'o_custkey.long.isNotNull, - 'o_orderdate.date.isNotNull, - 'o_orderdate.date >= LocalDate.of(1993, 10, 1), - 'o_orderdate.date < LocalDate.of(1994, 1, 1) + Symbol("o_orderkey").long.isNotNull, + Symbol("o_custkey").long.isNotNull, + Symbol("o_orderdate").date.isNotNull, + Symbol("o_orderdate").date >= LocalDate.of(1993, 10, 1), + Symbol("o_orderdate").date < LocalDate.of(1994, 1, 1) ), "lineitem2" -> Seq( - 'l_orderkey.long.isNotNull, - 'l_returnflag.string.isNotNull, - 'l_returnflag.string === "R" + Symbol("l_orderkey").long.isNotNull, + Symbol("l_returnflag").string.isNotNull, + Symbol("l_returnflag").string === "R" ), "nation3" -> Seq( - 'n_nationkey.long.isNotNull + Symbol("n_nationkey").long.isNotNull ) ), Map( // q11 "partsupp0" -> Seq( - 'ps_suppkey.long.isNotNull + Symbol("ps_suppkey").long.isNotNull ), "supplier1" -> Seq( - 's_suppkey.long.isNotNull, - 's_nationkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull, + Symbol("s_nationkey").long.isNotNull ), "nation2" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_name.string.isNotNull, - 'n_name.string === "GERMANY" + Symbol("n_nationkey").long.isNotNull, + Symbol("n_name").string.isNotNull, + Symbol("n_name").string === "GERMANY" ) ), Map( // q12 "orders0" -> Seq( - 'o_orderkey.long.isNotNull + Symbol("o_orderkey").long.isNotNull ), "lineitem1" -> Seq( - 'l_orderkey.long.isNotNull, - 'l_receiptdate.date.isNotNull, - 'l_receiptdate.date >= LocalDate.of(1994, 1, 1), - 'l_receiptdate.date < LocalDate.of(1995, 1, 1), - 'l_commitdate.date.isNotNull, - 'l_shipdate.date.isNotNull, - 'l_shipmode.string.in("MAIL", "SHIP").asInstanceOf[Predicate] + Symbol("l_orderkey").long.isNotNull, + Symbol("l_receiptdate").date.isNotNull, + Symbol("l_receiptdate").date >= LocalDate.of(1994, 1, 1), + Symbol("l_receiptdate").date < LocalDate.of(1995, 1, 1), + Symbol("l_commitdate").date.isNotNull, + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipmode").string.in("MAIL", "SHIP").asInstanceOf[Predicate] ) ), Map( // q13 "customer0" -> Nil, "orders1" -> Seq( - 'o_custkey.long.isNotNull, - 'o_comment.string.isNotNull + Symbol("o_custkey").long.isNotNull, + Symbol("o_comment").string.isNotNull )), Map( // q14 "lineitem0" -> Seq( - 'l_partkey.long.isNotNull, - 'l_shipdate.date.isNotNull, - 'l_shipdate.date >= LocalDate.of(1995, 9, 1), - 'l_shipdate.date < LocalDate.of(1995, 10, 1) + Symbol("l_partkey").long.isNotNull, + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipdate").date >= LocalDate.of(1995, 9, 1), + Symbol("l_shipdate").date < LocalDate.of(1995, 10, 1) ), "part1" -> Seq( - 'p_partkey.long.isNotNull + Symbol("p_partkey").long.isNotNull ) ), Map( // q15 "supplier0" -> Seq( - 's_suppkey.long.isNotNull + Symbol("s_suppkey").long.isNotNull ), "lineitem1" -> Seq( - 'l_suppkey.long.isNotNull, - 'l_shipdate.date.isNotNull, - 'l_shipdate.date >= LocalDate.of(1996, 1, 1), - 'l_shipdate.date < LocalDate.of(1996, 4, 1) + Symbol("l_suppkey").long.isNotNull, + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipdate").date >= LocalDate.of(1996, 1, 1), + Symbol("l_shipdate").date < LocalDate.of(1996, 4, 1) ) ), Map( // q16 "partsupp0" -> Seq( - 'ps_partkey.long.isNotNull + Symbol("ps_partkey").long.isNotNull ), "supplier1" -> Seq( - 's_comment.string.isNotNull + Symbol("s_comment").string.isNotNull ), "part2" -> Seq( - 'p_partkey.long.isNotNull, - 'p_brand.string.isNotNull, - 'p_brand.string =!= "Brand#45", - 'p_type.string.isNotNull, - 'p_size.int.in(49, 14, 23, 45, 19, 3, 36, 9).asInstanceOf[Predicate] + Symbol("p_partkey").long.isNotNull, + Symbol("p_brand").string.isNotNull, + Symbol("p_brand").string =!= "Brand#45", + Symbol("p_type").string.isNotNull, + Symbol("p_size").int.in(49, 14, 23, 45, 19, 3, 36, 9).asInstanceOf[Predicate] ) ), Map( // q17 "lineitem0" -> Seq( - 'l_partkey.long.isNotNull, - 'l_quantity.decimal(10, 0).isNotNull + Symbol("l_partkey").long.isNotNull, + Symbol("l_quantity").decimal(10, 0).isNotNull ), "part1" -> Seq( - 'p_partkey.long.isNotNull, - 'p_brand.string.isNotNull, - 'p_brand.string === "Brand#23", - 'p_container.string.isNotNull, - 'p_container.string === "MED BOX" + Symbol("p_partkey").long.isNotNull, + Symbol("p_brand").string.isNotNull, + Symbol("p_brand").string === "Brand#23", + Symbol("p_container").string.isNotNull, + Symbol("p_container").string === "MED BOX" ), "lineitem2" -> Seq( - 'l_partkey.long.isNotNull + Symbol("l_partkey").long.isNotNull ) ), Map( // q18 "customer0" -> Seq( - 'c_custkey.long.isNotNull + Symbol("c_custkey").long.isNotNull ), "orders1" -> Seq( - 'o_orderkey.long.isNotNull, - 'o_custkey.long.isNotNull + Symbol("o_orderkey").long.isNotNull, + Symbol("o_custkey").long.isNotNull ), "lineitem2" -> Nil, "lineitem3" -> Seq( - 'l_orderkey.long.isNotNull + Symbol("l_orderkey").long.isNotNull ) ), Map( // q19 "lineitem0" -> Seq( - 'l_shipinstruct.string.isNotNull, - 'l_shipmode.string.in("AIR", "AIR REG").asInstanceOf[Predicate], - 'l_shipinstruct.string === "DELIVER IN PERSON", - 'l_partkey.long.isNotNull, - ('l_quantity.decimal(10, 0) >= Decimal(1) && - 'l_quantity.decimal(10, 0) <= Decimal(11)) || - ('l_quantity.decimal(10, 0) >= Decimal(10) && - 'l_quantity.decimal(10, 0) <= Decimal(20)) || - ('l_quantity.decimal(10, 0) >= Decimal(20) && - 'l_quantity.decimal(10, 0) <= Decimal(30)) + Symbol("l_shipinstruct").string.isNotNull, + Symbol("l_shipmode").string.in("AIR", "AIR REG").asInstanceOf[Predicate], + Symbol("l_shipinstruct").string === "DELIVER IN PERSON", + Symbol("l_partkey").long.isNotNull, + (Symbol("l_quantity").decimal(10, 0) >= Decimal(1) && + Symbol("l_quantity").decimal(10, 0) <= Decimal(11)) || + (Symbol("l_quantity").decimal(10, 0) >= Decimal(10) && + Symbol("l_quantity").decimal(10, 0) <= Decimal(20)) || + (Symbol("l_quantity").decimal(10, 0) >= Decimal(20) && + Symbol("l_quantity").decimal(10, 0) <= Decimal(30)) ), "part1" -> Seq( - 'p_size.int.isNotNull, - 'p_size.int >= 1, - 'p_partkey.long.isNotNull, - ('p_brand.string === "Brand#12" && - 'p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG") && - 'p_size.int <= 5) || - ('p_brand.string === "Brand#23" && - 'p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED PACK") && - 'p_size.int <= 10) || - ('p_brand.string === "Brand#34" && - 'p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG") && - 'p_size.int <= 15) + Symbol("p_size").int.isNotNull, + Symbol("p_size").int >= 1, + Symbol("p_partkey").long.isNotNull, + (Symbol("p_brand").string === "Brand#12" && + Symbol("p_container").string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG") && + Symbol("p_size").int <= 5) || + (Symbol("p_brand").string === "Brand#23" && + Symbol("p_container").string.in("MED BAG", "MED BOX", "MED PKG", "MED PACK") && + Symbol("p_size").int <= 10) || + (Symbol("p_brand").string === "Brand#34" && + Symbol("p_container").string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG") && + Symbol("p_size").int <= 15) ) ), Map( // q20 "supplier0" -> Seq( - 's_nationkey.long.isNotNull + Symbol("s_nationkey").long.isNotNull ), "partsupp1" -> Seq( - 'ps_suppkey.long.isNotNull, - 'ps_partkey.long.isNotNull, - 'ps_availqty.int.isNotNull + Symbol("ps_suppkey").long.isNotNull, + Symbol("ps_partkey").long.isNotNull, + Symbol("ps_availqty").int.isNotNull ), "part2" -> Seq( - 'p_name.string.isNotNull + Symbol("p_name").string.isNotNull ), "lineitem3" -> Seq( - 'l_partkey.long.isNotNull, - 'l_suppkey.long.isNotNull, - 'l_shipdate.date.isNotNull, - 'l_shipdate.date >= LocalDate.of(1994, 1, 1), - 'l_shipdate.date < LocalDate.of(1995, 1, 1) + Symbol("l_partkey").long.isNotNull, + Symbol("l_suppkey").long.isNotNull, + Symbol("l_shipdate").date.isNotNull, + Symbol("l_shipdate").date >= LocalDate.of(1994, 1, 1), + Symbol("l_shipdate").date < LocalDate.of(1995, 1, 1) ), "nation4" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_name.string.isNotNull, - 'n_name.string === "CANADA" + Symbol("n_nationkey").long.isNotNull, + Symbol("n_name").string.isNotNull, + Symbol("n_name").string === "CANADA" ) ), Map( // q21 "supplier0" -> Seq( - 's_nationkey.long.isNotNull, - 's_suppkey.long.isNotNull + Symbol("s_nationkey").long.isNotNull, + Symbol("s_suppkey").long.isNotNull ), "lineitem1" -> Seq( - 'l_orderkey.long.isNotNull, - 'l_suppkey.long.isNotNull, - 'l_commitdate.date.isNotNull, - 'l_receiptdate.date.isNotNull + Symbol("l_orderkey").long.isNotNull, + Symbol("l_suppkey").long.isNotNull, + Symbol("l_commitdate").date.isNotNull, + Symbol("l_receiptdate").date.isNotNull ), "lineitem2" -> Nil, "lineitem3" -> Seq( - 'l_receiptdate.date.isNotNull, - 'l_commitdate.date.isNotNull + Symbol("l_receiptdate").date.isNotNull, + Symbol("l_commitdate").date.isNotNull ), "orders4" -> Seq( - 'o_orderstatus.string.isNotNull, - 'o_orderstatus.string === "F", - 'o_orderkey.long.isNotNull + Symbol("o_orderstatus").string.isNotNull, + Symbol("o_orderstatus").string === "F", + Symbol("o_orderkey").long.isNotNull ), "nation5" -> Seq( - 'n_nationkey.long.isNotNull, - 'n_name.string.isNotNull, - 'n_name.string === "SAUDI ARABIA" + Symbol("n_nationkey").long.isNotNull, + Symbol("n_name").string.isNotNull, + Symbol("n_name").string === "SAUDI ARABIA" ) ), Map( // q22 "customer0" -> Seq( - 'c_acctbal.decimal(10, 0).isNotNull + Symbol("c_acctbal").decimal(10, 0).isNotNull ), "orders1" -> Nil) ) From ba54da6769f14e7f63f9aa02f756357d864205ff Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Thu, 22 Aug 2024 13:17:23 +0800 Subject: [PATCH 3/4] remove std::cerr --- cpp-ch/local-engine/Storages/StorageMergeTreeFactory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp-ch/local-engine/Storages/StorageMergeTreeFactory.h b/cpp-ch/local-engine/Storages/StorageMergeTreeFactory.h index 09a2d5747b26..71e4da6bb696 100644 --- a/cpp-ch/local-engine/Storages/StorageMergeTreeFactory.h +++ b/cpp-ch/local-engine/Storages/StorageMergeTreeFactory.h @@ -49,7 +49,7 @@ class DataPartStorageHolder ~DataPartStorageHolder() { storage_->removePartFromMemory(*data_part_); - std::cerr << fmt::format("clean part {}", data_part_->name) << std::endl; + // std::cerr << fmt::format("clean part {}", data_part_->name) << std::endl; } private: From 4a613f7ff09dbf8c0cf27f67021de0dcd5910647 Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Thu, 22 Aug 2024 13:34:41 +0800 Subject: [PATCH 4/4] fix race in SparkMergeTreeWriter::finalizeMerge --- .../Storages/Mergetree/SparkMergeTreeWriter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp b/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp index 93f4374d4ce1..e7d3be0db509 100644 --- a/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp +++ b/cpp-ch/local-engine/Storages/Mergetree/SparkMergeTreeWriter.cpp @@ -265,11 +265,11 @@ void SparkMergeTreeWriter::finalizeMerge() continue; GlobalThreadPool::instance().scheduleOrThrow( - [&]() -> void + [storage_ = storage, tmp = tmp_part]() -> void { - for (const auto & disk : storage->getDisks()) + for (const auto & disk : storage_->getDisks()) { - auto rel_path = storage->getRelativeDataPath() + "/" + tmp_part; + auto rel_path = storage_->getRelativeDataPath() + "/" + tmp; disk->removeRecursive(rel_path); } });