Skip to content

Commit

Permalink
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030) (#7720)
Browse files Browse the repository at this point in the history
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20241030)

* Fix Build due to ClickHouse/ClickHouse#71053
* Add a case for #7726

* Try to fix occasional failed case "read data from orc file format", now, we don't generate data on the fly, instead we use pre-generated data.

---------

Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent c35d97e commit 1a34445
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 7 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
select LINEITEM.L_DISCOUNT,
PART.P_TYPE,
LINEITEM.L_COMMENT,
LINEITEM.L_SUPPKEY,
PART.P_PARTKEY,
PART.P_SIZE,
LINEITEM.L_RETURNFLAG,
LINEITEM.L_RECEIPTDATE,
PART.P_NAME,
SUPPLIER.S_COMMENT,
LINEITEM.L_ORDERKEY,
PART.P_MFGR,
SUPPLIER.S_ACCTBAL,
SUPPLIER.S_SUPPKEY,
LINEITEM.L_SHIPMODE,
SUPPLIER.S_NATIONKEY,
LINEITEM.L_SHIPDATE,
LINEITEM.L_COMMITDATE,
SUPPLIER.S_NAME,
PART.P_COMMENT,
LINEITEM.L_TAX,
LINEITEM.L_QUANTITY,
LINEITEM.L_PARTKEY,
PART.P_CONTAINER,
MIN(LINEITEM.L_EXTENDEDPRICE),
COUNT(LINEITEM.L_QUANTITY),
COUNT(DISTINCT LINEITEM.L_PARTKEY),
MIN(LINEITEM.L_TAX),
MIN(ORDERS.O_TOTALPRICE),
COUNT(LINEITEM.L_EXTENDEDPRICE),
COUNT(ORDERS.O_SHIPPRIORITY),
COUNT(1),
MAX(LINEITEM.L_DISCOUNT)
from LINEITEM
INNER JOIN SUPPLIER AS SUPPLIER ON LINEITEM.L_SUPPKEY = SUPPLIER.S_SUPPKEY
INNER JOIN PART AS PART ON LINEITEM.L_PARTKEY = PART.P_PARTKEY
INNER JOIN ORDERS AS ORDERS ON LINEITEM.L_ORDERKEY = ORDERS.O_ORDERKEY
where (not (((P_RETAILPRICE is not null or
((S_NATIONKEY is not null and P_MFGR like '%Manufacturer#1') or P_BRAND not like 'Brand#11')) or
((S_SUPPKEY not in
(1206, 1496, 1191, 2445, 491, 1407, 1969, 261, 1418, 310, 2099, 1343, 327, 261, 707, 37, 753, 696, 1363,
628, 1158, 2239, 26, 1180, 2448, 1698, 2099, 1326, 1247, 1203, 161, 1698, 310, 692, 491, 1920, 28, 370,
370, 261, 2258, 1146, 983, 683, 24, 1611, 5161, 3141, 2258, 1287, 683, 1720, 1887, 310, 707, 1836, 1287,
2065, 1859, 1203, 1611, 1835, 2099, 701, 2314, 692, 1418, 2367, 425, 1720, 8285, 1969, 1804, 310, 2258,
1418, 463, 2048, 368, 1253, 549, 2258, 327, 1973, 817) and 1300 > S_SUPPKEY) or (S_PHONE not in
('10-246-381-9259',
'10-211-466-9198',
'10-509-209-3829',
'10-741-929-4244',
'10-393-500-3856',
'10-495-104-1252',
'10-983-665-2259',
'10-295-590-8708',
'10-983-665-2259',
'10-745-572-7198',
'10-384-209-1825',
'10-734-420-5738',
'10-845-970-4551',
'10-630-928-4130',
'10-325-193-7475',
'%10-475-868-5521',
'10-903-990-3612',
'10-352-443-2162%',
'10-842-403-7954',
'10-789-325-3069',
'10-996-906-4890',
'10-404-519-2270',
'10-848-716-8078',
'10-246-381-9259',
'10-262-377-2302',
'10-361-729-1693',
'10-745-572-7198',
'10-384-209-1825',
'10-262-132-6639',
'10-361-729-1693',
'10-746-144-5600',
'10-409-763-8909',
'10-123-465-1292',
'10-745-572-7198%',
'10-599-740-9848',
'10-453-843-1585',
'10-191-563-6127',
'10-848-716-8078',
'10-763-945-1271',
'10-393-500-3856') and
(not (P_NAME not like 'light dark lemon lace medium%' and P_NAME is null))))) or
((((S_ADDRESS is null or P_CONTAINER in
('LG JAR', 'JUMBO CASE', 'JUMBO CASE', 'MED BOX', 'WRAP BAG', 'SM CASE',
'WRAP JAR', 'JUMBO PKG', 'SM CAN', 'SM BOX', 'JUMBO CASE', 'MED BOX', 'LG JAR',
'JUMBO CASE', 'MED DRUM', 'JUMBO PKG', 'SM CAN', 'WRAP JAR', 'LG CASE', 'LG BAG',
'SM PACK', 'JUMBO DRUM', 'WRAP BOX', 'JUMBO CAN', 'LG PKG', 'WRAP CAN',
'MED PACK', 'SM BOX', 'SM DRUM', 'SM PACK', 'MED DRUM', 'MED PACK', 'MED BOX',
'MED CAN%', 'SM JAR', 'SM CAN', 'JUMBO BOX', 'JUMBO BAG', 'LG BAG', 'LG PKG',
'LG PACK', 'LG BAG', 'JUMBO BOX', 'SM BOX', 'JUMBO CAN', 'JUMBO PKG', 'LG BAG',
'MED BOX', 'JUMBO CASE', 'MED BOX', 'LG BAG', 'LG PACK', 'MED BOX', 'LG PKG',
'SM BOX', 'WRAP BOX', 'LG CASE', 'MED PACK', 'LG PKG', '%LG CASE', 'LG JAR',
'LG BAG', 'LG BOX', 'SM CAN', 'WRAP CAN', 'WRAP PACK', 'JUMBO CASE', 'SM BOX',
'SM PACK', 'WRAP PKG', 'MED CAN', 'SM BOX', 'LG CASE', 'JUMBO CAN', 'LG JAR',
'SM DRUM', 'MED PKG', 'JUMBO BAG', 'SM CASE', 'MED BAG', 'SM PACK',
'SM PACK')) and S_SUPPKEY is not null) and (P_PARTKEY not in
(1358682, 1592117, 1114403, 839396,
1114617, 959268, 1114713, 1358631,
806397, 959018, 1114926, 812800,
1568237, 959088, 839340, 959419,
1115053, 1358740, 1114282) and
(S_SUPPKEY between 463 and 1887 or
S_SUPPKEY not in
(1287, 1422, 1878, 1191, 1804, 476,
1097, 1326, 1597, 1158, 261, 1689,
1493, 2314, 817, 1097, 2239, 327,
1887, 118, 1547, 476, 2131, 1247,
1496, 1698, 1717, 454, 1692, 1920,
1973, 2010, 1804, 774, 1611, 425,
28, 1611, 183, 983, 800, 5915, 1311,
24, 2298, 118, 183, 784, 1592, 1549,
983, 1283, 1418, 291, 118, 1407,
2072, 291, 1180, 1404, 1097, 1724,
1611, 692, 491, 316, 161, 2314,
1404, 696, 2072, 2072, 491, 1692,
764, 742, 118, 425)))) and
(P_CONTAINER in
('SM PKG', 'LG PKG', 'LG CASE', 'MED PKG', 'WRAP JAR', 'LG BAG', 'SM BOX', 'JUMBO BOX', 'SM PKG',
'SM PKG', 'JUMBO BOX', 'MED BOX', 'JUMBO PKG', 'WRAP CAN', 'MED DRUM', 'MED JAR', 'SM BAG', 'MED CAN',
'SM PACK', 'SM CASE', 'MED BAG', 'JUMBO PKG', 'LG CASE', 'SM PKG', 'MED BOX', 'LG CASE', 'JUMBO DRUM',
'MED BAG', 'JUMBO CASE', 'SM BOX', 'JUMBO PACK', 'WRAP BOX', '%JUMBO BOX', 'JUMBO BOX', 'JUMBO CASE',
'SM CAN', 'JUMBO BOX', 'SM CAN', 'LG CASE') and P_BRAND is null))))
or (((P_BRAND is not null or (P_SIZE not in
(25, 11, 48, 15, 48, 16, 3, 45, 37, 42, 47, 42, 42, 16, 97, 16, 48, 12, 87, 13, 27, 22,
42, 37, 50, 9, 34) and S_NATIONKEY >= 0)) or 955.65 = S_ACCTBAL) or
(P_TYPE not like 'MEDIUM POLISHED STEEL%' or (not S_ACCTBAL is null)))
group by LINEITEM.L_DISCOUNT, PART.P_TYPE, LINEITEM.L_COMMENT, LINEITEM.L_SUPPKEY, PART.P_PARTKEY, PART.P_SIZE,
LINEITEM.L_RETURNFLAG, LINEITEM.L_RECEIPTDATE, PART.P_NAME, SUPPLIER.S_COMMENT, LINEITEM.L_ORDERKEY,
PART.P_MFGR, SUPPLIER.S_ACCTBAL, SUPPLIER.S_SUPPKEY, LINEITEM.L_SHIPMODE, SUPPLIER.S_NATIONKEY,
LINEITEM.L_SHIPDATE, LINEITEM.L_COMMITDATE, SUPPLIER.S_NAME, PART.P_COMMENT, LINEITEM.L_TAX,
LINEITEM.L_QUANTITY, LINEITEM.L_PARTKEY, PART.P_CONTAINER
Original file line number Diff line number Diff line change
Expand Up @@ -1038,16 +1038,15 @@ class GlutenClickHouseFileFormatSuite
}

test("read data from orc file format") {
val filePath = basePath + "/orc_test.orc"
// val filePath = "/data2/case_insensitive_column_matching.orc"
val filePath = s"$orcDataPath/all_data_types_with_non_primitive_type.snappy.orc"
val orcFileFormat = "orc"
val sql =
s"""
| select *
| from $orcFileFormat.`$filePath`
| where long_field > 30
|""".stripMargin
testFileFormatBase(filePath, orcFileFormat, sql, df => {})
compareResultsAgainstVanillaSpark(sql, compareResult = true, df => {}, noFallBack = true)
}

// TODO: Fix: if the field names has upper case form, it will return null value
Expand Down Expand Up @@ -1085,8 +1084,9 @@ class GlutenClickHouseFileFormatSuite
customCheck: DataFrame => Unit,
noFallBack: Boolean = true
): Unit = {
val data = genTestData()
spark
.createDataFrame(genTestData())
.createDataFrame(data)
.write
.mode("overwrite")
.format(fileFormat)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class GlutenClickHouseWholeStageTransformerSuite extends WholeStageTransformerSu
}

final protected val rootPath: String = this.getClass.getResource("/").getPath
final protected val queryPath: String = s"${rootPath}queries"
final protected val basePath: String =
if (UTSystemParameters.diskOutputDataPath.equals("/")) rootPath + "tests-working-home"
else UTSystemParameters.diskOutputDataPath + "/" + rootPath + "tests-working-home"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,25 @@ package org.apache.gluten.execution.tpch
import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution._
import org.apache.gluten.extension.GlutenPlan
import org.apache.gluten.utils.Arm

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.optimizer.BuildLeft
import org.apache.spark.sql.execution.InputIteratorTransformer
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper}

import java.io.File

import scala.io.Source

class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
extends GlutenClickHouseTPCHAbstractSuite
with AdaptiveSparkPlanHelper {

override protected val needCopyParquetToTablePath = true

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String = rootPath + "queries/tpch-queries-ch"
override protected val tpchQueries: String = s"$queryPath/tpch-queries-ch"
override protected val queriesResults: String = rootPath + "queries-output"

/** Run Gluten + ClickHouse Backend with SortShuffleManager */
Expand Down Expand Up @@ -349,4 +354,11 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite
runQueryAndCompare(sql) { df => }
}
}

ignore("https://github.com/apache/incubator-gluten/issues/7726") {
runQueryAndCompare(Arm.withResource(
Source.fromFile(new File(s"$queryPath/tpch-schema-related/7726.sql"), "UTF-8"))(_.mkString)) {
df =>
}
}
}
4 changes: 2 additions & 2 deletions cpp-ch/clickhouse.version
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
CH_BRANCH=rebase_ch/20241026
CH_COMMIT=3691d19817d
CH_BRANCH=rebase_ch/20241030
CH_COMMIT=847cfa6237c
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ MergeTreeDataWriter::TemporaryPart SparkMergeTreeDataWriter::writeTempPart(
txn ? txn->tid : Tx::PrehistoricTID,
false,
false,
false,
context->getWriteSettings());

out->writeWithPermutation(block, perm_ptr);
Expand Down

0 comments on commit 1a34445

Please sign in to comment.