diff --git a/.github/workflows/check_license.yml b/.github/workflows/check_license.yml new file mode 100644 index 000000000000..338397dbd6cb --- /dev/null +++ b/.github/workflows/check_license.yml @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: license header check +on: + pull_request +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + license-check: + name: License Header Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Check License Header + run: | + git fetch --recurse-submodules=no origin main ${{github.event.pull_request.base.sha}} + pip install regex + cd $GITHUB_WORKSPACE/ + ./.github/workflows/util/check.sh ${{github.event.pull_request.base.sha}} diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index f1c9c2548324..8417264373a0 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -14,43 +14,56 @@ # limitations under the License. name: Code style checks - on: - pull_request - + pull_request: + paths: + - '.github/workflows/code_style.yml' + - 'cpp/**' + - 'cpp-ch/**' concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true jobs: - formatting-check: - name: Formatting Check + CPP-format-check: + name: CPP Format Check runs-on: ubuntu-latest strategy: matrix: path: - - check: 'cpp/core' - exclude: '' - - check: 'cpp/velox' + - check: 'cpp' exclude: '' steps: - uses: actions/checkout@v4 - - name: Run clang-format style check for C/C++ programs. + - name: Run clang-format style check for C/C++ uses: jidicula/clang-format-action@v4.11.0 with: clang-format-version: '15' check-path: ${{ matrix.path['check'] }} fallback-style: 'Google' # optional - license-check: - name: License Header Check + CMake-format-check: + name: CMake Format Check runs-on: ubuntu-latest - + container: ubuntu:22.04 steps: - - uses: actions/checkout@v3 - - - name: Check License Header + - name: Install tools + run: | + apt update -y + apt install git python3-pip -y + pip3 install --user cmake-format + - uses: actions/checkout@v4 + - name: Check CMake format run: | - git fetch --recurse-submodules=no origin main ${{github.event.pull_request.base.sha}} - pip install regex - dev/check.sh ${{github.event.pull_request.base.sha}} + git config --global --add safe.directory $GITHUB_WORKSPACE + cd $GITHUB_WORKSPACE/ + fileList=$(find ./cpp ./cpp-ch -name CMakeLists.txt -o -name *.cmake) + for file in $fileList; do + /github/home/.local/bin/cmake-format --first-comment-is-literal True --in-place $file + done + if [ -n "$(git status --porcelain)" ]; then + echo "Please use cmake-format to format cmake files or apply the below patch." + git diff -- '*CMakeLists.txt' '*.cmake' + exit 1 + fi + echo "No CMake format issue." diff --git a/dev/check.py b/.github/workflows/util/check.py similarity index 100% rename from dev/check.py rename to .github/workflows/util/check.py diff --git a/dev/check.sh b/.github/workflows/util/check.sh similarity index 90% rename from dev/check.sh rename to .github/workflows/util/check.sh index 9b940845f92d..d8db8bd402c3 100755 --- a/dev/check.sh +++ b/.github/workflows/util/check.sh @@ -15,9 +15,9 @@ # limitations under the License. export BASE_COMMIT=$1 -dev/check.py header branch +./.github/workflows/util/check.py header branch if [ $? -ne 0 ]; then - dev/check.py header branch --fix + ./.github/workflows/util/check.py header branch --fix echo -e "\n==== Apply using:" echo "patch -p1 \< "83800000") { + withSQLConf("spark.databricks.delta.optimize.minFileSize" -> "838000") { // 3 from 37 parts are larger than this, so after optimize there should be 4 parts: // 3 original parts and 1 merged part spark.sql(s""" @@ -275,20 +278,20 @@ class GlutenClickHouseMergeTreeOptimizeSuite spark.sql("VACUUM lineitem_mergetree_optimize_p5 RETAIN 0 HOURS") spark.sql("VACUUM lineitem_mergetree_optimize_p5 RETAIN 0 HOURS") if (sparkVersion.equals("3.2")) { - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5")) == 75) + assertResult(99)(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5"))) } else { // For Spark 3.3 + Delta 2.3, vacuum command will create two commit files in deltalog dir. // this case will create a checkpoint - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5")) == 81) + assertResult(105)(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5"))) } val ret = spark.sql("select count(*) from lineitem_mergetree_optimize_p5").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) } withSQLConf( - ("spark.databricks.delta.optimize.maxFileSize" -> "10000000"), - ("spark.databricks.delta.optimize.minFileSize" -> "838250")) { + "spark.databricks.delta.optimize.maxFileSize" -> "10000000", + "spark.databricks.delta.optimize.minFileSize" -> "838250") { // of the remaing 3 original parts, 2 are less than 838250, 1 is larger (size 838255) // the merged part is ~27MB, so after optimize there should be 3 parts: // 1 merged part from 2 original parts, 1 merged part from 34 original parts @@ -299,14 +302,14 @@ class GlutenClickHouseMergeTreeOptimizeSuite spark.sql("VACUUM lineitem_mergetree_optimize_p5 RETAIN 0 HOURS") spark.sql("VACUUM lineitem_mergetree_optimize_p5 RETAIN 0 HOURS") if (sparkVersion.equals("3.2")) { - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5")) == 75) + assertResult(93)(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5"))) } else { // For Spark 3.3 + Delta 2.3, vacuum command will create two commit files in deltalog dir. - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5")) == 85) + assertResult(104)(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5"))) } val ret = spark.sql("select count(*) from lineitem_mergetree_optimize_p5").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) } // now merge all parts (testing merging from merged parts) @@ -315,14 +318,14 @@ class GlutenClickHouseMergeTreeOptimizeSuite spark.sql("VACUUM lineitem_mergetree_optimize_p5 RETAIN 0 HOURS") spark.sql("VACUUM lineitem_mergetree_optimize_p5 RETAIN 0 HOURS") if (sparkVersion.equals("3.2")) { - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5")) == 75) + assertResult(77)(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5"))) } else { // For Spark 3.3 + Delta 2.3, vacuum command will create two commit files in deltalog dir. - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5")) == 90) + assertResult(93)(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p5"))) } val ret = spark.sql("select count(*) from lineitem_mergetree_optimize_p5").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) } test("test mergetree optimize table with partition and bucket") { @@ -343,24 +346,22 @@ class GlutenClickHouseMergeTreeOptimizeSuite spark.sql("optimize lineitem_mergetree_optimize_p6") val ret = spark.sql("select count(*) from lineitem_mergetree_optimize_p6").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p6")) == { - if (sparkVersion.equals("3.2")) 475 else 501 - }) + assertResult(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p6")))( + if (sparkVersion.equals("3.2")) 499 else 528) spark.sql("VACUUM lineitem_mergetree_optimize_p6 RETAIN 0 HOURS") spark.sql("VACUUM lineitem_mergetree_optimize_p6 RETAIN 0 HOURS") - assert(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p6")) == { - if (sparkVersion.equals("3.2")) 315 else 327 - }) + assertResult(countFiles(new File(s"$basePath/lineitem_mergetree_optimize_p6")))( + if (sparkVersion.equals("3.2")) 315 else 327) val ret2 = spark.sql("select count(*) from lineitem_mergetree_optimize_p6").collect() - assert(ret2.apply(0).get(0) == 600572) + assertResult(600572)(ret2.apply(0).get(0)) } test("test skip index after optimize") { withSQLConf( - "spark.databricks.delta.optimize.maxFileSize" -> "100000000", + "spark.databricks.delta.optimize.maxFileSize" -> "2000000", "spark.sql.adaptive.enabled" -> "false") { spark.sql(s""" |DROP TABLE IF EXISTS lineitem_mergetree_index; @@ -385,12 +386,12 @@ class GlutenClickHouseMergeTreeOptimizeSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) - val mergetreeScan = scanExec(0) + assertResult(1)(scanExec.size) + val mergetreeScan = scanExec.head val ret = df.collect() - assert(ret.apply(0).get(0) == 2) + assertResult(2)(ret.apply(0).get(0)) val marks = mergetreeScan.metrics("selectedMarks").value - assert(marks == 1) + assertResult(1)(marks) val directory = new File(s"$basePath/lineitem_mergetree_index") val partDir = directory.listFiles().filter(f => f.getName.endsWith("merged")).head @@ -403,7 +404,7 @@ class GlutenClickHouseMergeTreeOptimizeSuite test("test mergetree optimize with the path based table") { val dataPath = s"$basePath/lineitem_mergetree_optimize_path_based" clearDataPath(dataPath) - withSQLConf("spark.databricks.delta.optimize.minFileSize" -> "83800000") { + withSQLConf("spark.databricks.delta.optimize.minFileSize" -> "838000") { // 3 from 37 parts are larger than this, so after optimize there should be 4 parts: // 3 original parts and 1 merged part @@ -422,18 +423,18 @@ class GlutenClickHouseMergeTreeOptimizeSuite clickhouseTable.vacuum(0.0) clickhouseTable.vacuum(0.0) if (sparkVersion.equals("3.2")) { - assert(countFiles(new File(dataPath)) == 75) + assertResult(99)(countFiles(new File(dataPath))) } else { - assert(countFiles(new File(dataPath)) == 81) + assertResult(105)(countFiles(new File(dataPath))) } val ret = spark.sql(s"select count(*) from clickhouse.`$dataPath`").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) } withSQLConf( - ("spark.databricks.delta.optimize.maxFileSize" -> "10000000"), - ("spark.databricks.delta.optimize.minFileSize" -> "838250")) { + "spark.databricks.delta.optimize.maxFileSize" -> "10000000", + "spark.databricks.delta.optimize.minFileSize" -> "838250") { // of the remaing 3 original parts, 2 are less than 838250, 1 is larger (size 838255) // the merged part is ~27MB, so after optimize there should be 3 parts: // 1 merged part from 2 original parts, 1 merged part from 34 original parts @@ -445,13 +446,13 @@ class GlutenClickHouseMergeTreeOptimizeSuite clickhouseTable.vacuum(0.0) clickhouseTable.vacuum(0.0) if (sparkVersion.equals("3.2")) { - assert(countFiles(new File(dataPath)) == 75) + assertResult(93)(countFiles(new File(dataPath))) } else { - assert(countFiles(new File(dataPath)) == 85) + assertResult(104)(countFiles(new File(dataPath))) } val ret = spark.sql(s"select count(*) from clickhouse.`$dataPath`").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) } // now merge all parts (testing merging from merged parts) @@ -461,19 +462,19 @@ class GlutenClickHouseMergeTreeOptimizeSuite clickhouseTable.vacuum(0.0) clickhouseTable.vacuum(0.0) if (sparkVersion.equals("3.2")) { - assert(countFiles(new File(dataPath)) == 75) + assertResult(77)(countFiles(new File(dataPath))) } else { - assert(countFiles(new File(dataPath)) == 90) + assertResult(93)(countFiles(new File(dataPath))) } val ret = spark.sql(s"select count(*) from clickhouse.`$dataPath`").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) } test("test mergetree insert with optimize basic") { withSQLConf( - ("spark.databricks.delta.optimize.minFileSize" -> "200000000"), - ("spark.gluten.sql.columnar.backend.ch.runtime_settings.mergetree.merge_after_insert" -> "true") + "spark.databricks.delta.optimize.minFileSize" -> "200000000", + "spark.gluten.sql.columnar.backend.ch.runtime_settings.mergetree.merge_after_insert" -> "true" ) { spark.sql(s""" |DROP TABLE IF EXISTS lineitem_mergetree_insert_optimize_basic; @@ -487,10 +488,10 @@ class GlutenClickHouseMergeTreeOptimizeSuite |""".stripMargin) val ret = spark.sql("select count(*) from lineitem_mergetree_insert_optimize_basic").collect() - assert(ret.apply(0).get(0) == 600572) + assertResult(600572)(ret.apply(0).get(0)) eventually(timeout(60.seconds), interval(3.seconds)) { - assert( - new File(s"$basePath/lineitem_mergetree_insert_optimize_basic").listFiles().length == 2 + assertResult(2)( + new File(s"$basePath/lineitem_mergetree_insert_optimize_basic").listFiles().length ) } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala index c8c6307aba06..791239fabf48 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreePathBasedWriteSuite.scala @@ -60,6 +60,9 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .set( "spark.gluten.sql.columnar.backend.ch.runtime_settings.mergetree.merge_after_insert", "false") + .set( + "spark.gluten.sql.columnar.backend.ch.runtime_settings.input_format_parquet_max_block_size", + "8192") } override protected def createTPCHNotNullTables(): Unit = { @@ -143,7 +146,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite case f: FileSourceScanExecTransformer => f case w: WholeStageTransformer => w } - assert(plans.size == 4) + assertResult(4)(plans.size) val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer] assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) @@ -154,10 +157,8 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) // GLUTEN-5060: check the unnecessary FilterExec val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer] @@ -174,7 +175,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .load(dataPath) .where("l_shipdate = date'1998-09-02'") .count() - assert(result == 183) + assertResult(183)(result) } test("test mergetree path based write with dataframe api") { @@ -236,40 +237,35 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite case f: FileSourceScanExecTransformer => f case w: WholeStageTransformer => w } - assert(plans.size == 4) + assertResult(4)(plans.size) val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer] assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) - assert( + assertResult("l_shipdate,l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_shipdate,l_orderkey")) - assert( + .mkString(",")) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_shipdate")) - assert( + .mkString(",")) + assertResult("l_returnflag,l_linestatus")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .lowCardKeyOption .get - .mkString(",") - .equals("l_returnflag,l_linestatus")) + .mkString(",")) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) // GLUTEN-5060: check the unnecessary FilterExec val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer] @@ -286,7 +282,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .load(dataPath) .where("l_shipdate = date'1998-09-02'") .collect() - assert(result.length == 183) + assertResult(110501)(result.apply(0).get(0)) } test("test mergetree path based insert overwrite partitioned table with small table, static") { @@ -320,7 +316,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .format("clickhouse") .load(dataPath) .count() - assert(result == 2418) + assertResult(2418)(result) } test("test mergetree path based insert overwrite partitioned table with small table, dynamic") { @@ -355,7 +351,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .format("clickhouse") .load(dataPath) .count() - assert(result == 600572) + assertResult(600572)(result) } } @@ -381,11 +377,11 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .format("clickhouse") .load(dataPath) .where("l_returnflag = 'Z'") - assert(df.count() == 1) + assertResult(1)(df.count()) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) @@ -397,16 +393,13 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert( - addFiles.map(_.rows).sum - == 600572) - + assertResult(600572)(addFiles.map(_.rows).sum) // 4 parts belong to the first batch // 2 parts belong to the second batch (1 actual updated part, 1 passively updated). - assert(addFiles.size == 6) + assertResult(6)(addFiles.size) val filePaths = addFiles.map(_.path).groupBy(name => name.substring(0, name.lastIndexOf("_"))) - assert(filePaths.size == 2) - assert(Array(3, 3).sameElements(filePaths.values.map(paths => paths.size).toArray.sorted)) + assertResult(2)(filePaths.size) + assertResult(Array(2, 4))(filePaths.values.map(paths => paths.size).toArray.sorted) } val clickhouseTable = ClickhouseTable.forPath(spark, dataPath) @@ -417,33 +410,31 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .format("clickhouse") .load(dataPath) .where("l_returnflag = 'X'") - assert(df.count() == 1) + assertResult(1)(df.count()) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(600572)(addFiles.map(_.rows).sum) // 4 parts belong to the first batch // 2 parts belong to the second batch (1 actual updated part, 1 passively updated). - assert(addFiles.size == 6) + assertResult(6)(addFiles.size) val filePaths = addFiles.map(_.path).groupBy(name => name.substring(0, name.lastIndexOf("_"))) - assert(filePaths.size == 3) - assert(Array(1, 2, 3).sameElements(filePaths.values.map(paths => paths.size).toArray.sorted)) + assertResult(2)(filePaths.size) + assertResult(Array(2, 4))(filePaths.values.map(paths => paths.size).toArray.sorted) } val df = spark.read .format("clickhouse") .load(dataPath) - assert(df.count() == 600572) + assertResult(600572)(df.count()) } test("test mergetree path based table delete") { @@ -465,7 +456,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val df = spark.read .format("clickhouse") .load(dataPath) - assert(df.count() == 600571) + assertResult(600571)(df.count()) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } @@ -474,17 +465,17 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) // 4 parts belong to the first batch // 2 parts belong to the second batch (1 actual updated part, 1 passively updated). - assert(addFiles.size == 6) + assertResult(6)(addFiles.size) val filePaths = addFiles.map(_.path).groupBy(name => name.substring(0, name.lastIndexOf("_"))) - assert(filePaths.size == 2) - assert(Array(3, 3).sameElements(filePaths.values.map(paths => paths.size).toArray.sorted)) + assertResult(2)(filePaths.size) + assertResult(Array(2, 4))(filePaths.values.map(paths => paths.size).toArray.sorted) val clickhouseTable = ClickhouseTable.forPath(spark, dataPath) clickhouseTable.delete("mod(l_orderkey, 3) = 2") val df1 = spark.read .format("clickhouse") .load(dataPath) - assert(df1.count() == 400089) + assertResult(400089)(df1.count()) } test("test mergetree path based table upsert") { @@ -503,8 +494,8 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val df0 = spark.sql(s""" | select count(*) from clickhouse.`$dataPath` |""".stripMargin) - assert( - df0.collect().apply(0).get(0) == 600572 + assertResult(600572)( + df0.collect().apply(0).get(0) ) upsertSourceTableAndCheck(dataPath) } @@ -540,8 +531,8 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val df1 = spark.sql(s""" | select count(*) from clickhouse.`$dataPath` |""".stripMargin) - assert( - df1.collect().apply(0).get(0) == 600572 + 3506 + assertResult(600572 + 3506)( + df1.collect().apply(0).get(0) ) } { @@ -549,8 +540,8 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite spark.sql(s""" | select count(*) from clickhouse.`$dataPath` where l_returnflag = 'Z' |""".stripMargin) - assert( - df2.collect().apply(0).get(0) == 3506 + assertResult(3506)( + df2.collect().apply(0).get(0) ) } @@ -559,8 +550,8 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite spark.sql(s""" | select count(*) from clickhouse.`$dataPath` where l_orderkey > 10000000 |""".stripMargin) - assert( - df3.collect().apply(0).get(0) == 3506 + assertResult(3506)( + df3.collect().apply(0).get(0) ) } } @@ -610,33 +601,31 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_shipdate,l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_shipdate,l_orderkey")) - assert( + .mkString(",")) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_shipdate")) + .mkString(",")) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert(addFiles.map(_.rows).sum == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) } val df = spark.read @@ -650,7 +639,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite |""".stripMargin) .agg(sum("l_linenumber").alias("res")) val result = df.collect() - assert(result(0).getLong(0) == 34842) + assertResult(34842)(result(0).getLong(0)) } test("test mergetree path based write with partition") { @@ -707,62 +696,56 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite runTPCHQueryBySQL(1, sqlStr, compareResult = false) { df => val result = df.collect() - assert(result.size == 4) - assert(result(0).getString(0).equals("A")) - assert(result(0).getString(1).equals("F")) - assert(result(0).getDouble(2) == 3803858.0) + assertResult(4)(result.length) + assertResult("A")(result(0).getString(0)) + assertResult("F")(result(0).getString(1)) + assertResult(3803858.0)(result(0).getDouble(2)) - assert(result(2).getString(0).equals("N")) - assert(result(2).getString(1).equals("O")) - assert(result(2).getDouble(2) == 7454519.0) + assertResult("N")(result(2).getString(0)) + assertResult("O")(result(2).getString(1)) + assertResult(7454519.0)(result(2).getDouble(2)) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) - assert(mergetreeScan.metrics("numFiles").value == 3744) + assertResult(3744)(mergetreeScan.metrics("numFiles").value) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_orderkey")) - assert( + .mkString(",")) + assertResult("l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_orderkey")) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size == 2) - assert( + .mkString(",")) + assertResult(2)(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) - .partitionColumns(0) - .equals("l_shipdate")) - assert( + .partitionColumns + .head) + assertResult("l_returnflag")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) - .partitionColumns(1) - .equals("l_returnflag")) + .partitionColumns(1)) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 3835) - assert(addFiles.map(_.rows).sum == 602945) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1992-06-01")).size == 2) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-01-01")).size == 4) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1995-01-21")).size == 2) + assertResult(3835)(addFiles.size) + assertResult(602945)(addFiles.map(_.rows).sum) + assertResult(2)(addFiles.count(_.partitionValues("l_shipdate").equals("1992-06-01"))) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1993-01-01"))) + assertResult(2)(addFiles.count(_.partitionValues("l_shipdate").equals("1995-01-21"))) } } @@ -814,61 +797,49 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) val buckets = ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption - assert(!buckets.isEmpty) - assert(buckets.get.numBuckets == 4) - assert( + assert(buckets.isDefined) + assertResult(4)(buckets.get.numBuckets) + assertResult("l_partkey,l_returnflag")( buckets.get.sortColumnNames - .mkString(",") - .equals("l_partkey,l_returnflag")) - assert( + .mkString(",")) + assertResult("l_orderkey")( buckets.get.bucketColumnNames - .mkString(",") - .equals("l_orderkey")) - assert( + .mkString(",")) + assertResult("l_partkey,l_returnflag")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_partkey,l_returnflag")) - assert( + .mkString(",")) + assertResult("l_partkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_partkey")) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size == 1) - assert( + .mkString(",")) + assertResult(1)(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) - .partitionColumns(0) - .equals("l_shipdate")) + .partitionColumns + .head) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 10089) - assert(addFiles.map(_.rows).sum == 600572) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1992-06-01")).size == 4) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-01-01")).size == 4) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1995-01-21")).size == 4) - assert( - addFiles - .filter( - f => - f.partitionValues.get("l_shipdate").get.equals("1995-01-21") && f.bucketNum.equals( - "00000")) - .size == 1) + assertResult(10089)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1992-06-01"))) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1993-01-01"))) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1995-01-21"))) + assertResult(1)(addFiles.count( + f => f.partitionValues("l_shipdate").equals("1995-01-21") && f.bucketNum.equals("00000"))) } // check part pruning effect of filter on bucket column val df = spark.sql(s""" @@ -883,7 +854,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite .flatMap(partition => partition.asInstanceOf[GlutenMergeTreePartition].partList) .map(_.name) .distinct - assert(touchedParts.size == 1) + assertResult(1)(touchedParts.size) // test upsert on partitioned & bucketed table upsertSourceTableAndCheck(dataPath) @@ -929,9 +900,9 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] @@ -941,10 +912,8 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) } } @@ -1052,9 +1021,9 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite | |""".stripMargin - assert( + assertResult("R")( // total rows should remain unchanged - spark.sql(sqlStr2).collect().apply(0).get(0) == "R" + spark.sql(sqlStr2).collect().apply(0).get(0) ) // test select * @@ -1101,40 +1070,38 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_shipdate,l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_shipdate,l_orderkey")) - assert( + .mkString(",")) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_shipdate")) + .mkString(",")) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert(addFiles.map(_.rows).sum == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) val plans = collect(df.queryExecution.executedPlan) { case scanExec: BasicScanExecTransformer => scanExec } - assert(plans.size == 1) - assert(plans(0).metrics("selectedMarksPk").value === 15) - assert(plans(0).metrics("totalMarksPk").value === 74) + assertResult(1)(plans.size) + assertResult(17)(plans.head.metrics("selectedMarksPk").value) + assertResult(74)(plans.head.metrics("totalMarksPk").value) } } @@ -1161,12 +1128,12 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) - val mergetreeScan = scanExec(0) + assertResult(1)(scanExec.size) + val mergetreeScan = scanExec.head - assert(ret.apply(0).get(0) == 1) + assertResult(1)(ret.apply(0).get(0)) val marks = mergetreeScan.metrics("selectedMarks").value - assert(marks == 1) + assertResult(1)(marks) val directory = new File(dataPath) // find a folder whose name is like 48b70783-b3b8-4bf8-9c52-5261aead8e3e_0_006 @@ -1197,11 +1164,11 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) - val mergetreeScan = scanExec(0) - assert(ret.apply(0).get(0) == 2) + assertResult(1)(scanExec.size) + val mergetreeScan = scanExec.head + assertResult(2)(ret.apply(0).get(0)) val marks = mergetreeScan.metrics("selectedMarks").value - assert(marks == 2) + assertResult(1)(marks) val directory = new File(dataPath) // find a folder whose name is like 48b70783-b3b8-4bf8-9c52-5261aead8e3e_0_006 @@ -1233,11 +1200,11 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) - val mergetreeScan = scanExec(0) - assert(ret.apply(0).get(0) == 2) + assertResult(1)(scanExec.size) + val mergetreeScan = scanExec.head + assertResult(2)(ret.apply(0).get(0)) val marks = mergetreeScan.metrics("selectedMarks").value - assert(marks == 1) + assertResult(1)(marks) val directory = new File(dataPath) // find a folder whose name is like 48b70783-b3b8-4bf8-9c52-5261aead8e3e_0_006 @@ -1277,18 +1244,16 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) } } @@ -1320,7 +1285,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite val fileFilter = new WildcardFileFilter("*_0_*") var dataFileList = dataPathFile.list(fileFilter) - assert(dataFileList.size == 5) + assertResult(6)(dataFileList.length) // re-create the same table val dataPath2 = s"$basePath/lineitem_mergetree_5219_s" @@ -1339,7 +1304,7 @@ class GlutenClickHouseMergeTreePathBasedWriteSuite assert(dataPathFile.isDirectory && dataPathFile.isDirectory) dataFileList = dataPathFile.list(fileFilter) - assert(dataFileList.size == 5) + assertResult(6)(dataFileList.length) } } // scalastyle:off line.size.limit diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala index 679cea37ba67..70c6553416e2 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseMergeTreeWriteSuite.scala @@ -57,6 +57,9 @@ class GlutenClickHouseMergeTreeWriteSuite .set( "spark.gluten.sql.columnar.backend.ch.runtime_settings.mergetree.merge_after_insert", "false") + .set( + "spark.gluten.sql.columnar.backend.ch.runtime_settings.input_format_parquet_max_block_size", + "8192") } override protected def createTPCHNotNullTables(): Unit = { @@ -128,7 +131,7 @@ class GlutenClickHouseMergeTreeWriteSuite case f: FileSourceScanExecTransformer => f case w: WholeStageTransformer => w } - assert(plans.size == 4) + assertResult(4)(plans.size) val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer] assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) @@ -140,10 +143,8 @@ class GlutenClickHouseMergeTreeWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) // GLUTEN-5060: check the unnecessary FilterExec val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer] @@ -200,9 +201,9 @@ class GlutenClickHouseMergeTreeWriteSuite | select count(*) from lineitem_mergetree_insertoverwrite | |""".stripMargin - assert( + assertResult(300001)( // total rows should remain unchanged - spark.sql(sql2).collect().apply(0).get(0) == 300001 + spark.sql(sql2).collect().apply(0).get(0) ) } @@ -251,9 +252,9 @@ class GlutenClickHouseMergeTreeWriteSuite | select count(*) from lineitem_mergetree_insertoverwrite2 | |""".stripMargin - assert( + assertResult(2418)( // total rows should remain unchanged - spark.sql(sql2).collect().apply(0).get(0) == 2418 + spark.sql(sql2).collect().apply(0).get(0) ) } @@ -303,9 +304,9 @@ class GlutenClickHouseMergeTreeWriteSuite | select count(*) from lineitem_mergetree_insertoverwrite3 | |""".stripMargin - assert( + assertResult(600572)( // total rows should remain unchanged - spark.sql(sql2).collect().apply(0).get(0) == 600572 + spark.sql(sql2).collect().apply(0).get(0) ) } } @@ -357,14 +358,14 @@ class GlutenClickHouseMergeTreeWriteSuite val df = spark.sql(sql1) val result = df.collect() - assert( + assertResult(1)( // in test data, there are only 1 row with l_orderkey = 12647 - result.apply(0).get(0) == 1 + result.apply(0).get(0) ) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) @@ -376,16 +377,14 @@ class GlutenClickHouseMergeTreeWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(600572)(addFiles.map(_.rows).sum) // 4 parts belong to the first batch // 2 parts belong to the second batch (1 actual updated part, 1 passively updated). - assert(addFiles.size == 6) + assertResult(6)(addFiles.size) val filePaths = addFiles.map(_.path).groupBy(name => name.substring(0, name.lastIndexOf("_"))) - assert(filePaths.size == 2) - assert(Array(3, 3).sameElements(filePaths.values.map(paths => paths.size).toArray.sorted)) + assertResult(2)(filePaths.size) + assertResult(Array(2, 4))(filePaths.values.map(paths => paths.size).toArray.sorted) } val sql2 = @@ -393,9 +392,9 @@ class GlutenClickHouseMergeTreeWriteSuite | select count(*) from lineitem_mergetree_update | |""".stripMargin - assert( + assertResult(600572)( // total rows should remain unchanged - spark.sql(sql2).collect().apply(0).get(0) == 600572 + spark.sql(sql2).collect().apply(0).get(0) ) } @@ -444,8 +443,8 @@ class GlutenClickHouseMergeTreeWriteSuite | select count(*) from lineitem_mergetree_delete |""".stripMargin) val result = df.collect() - assert( - result.apply(0).get(0) == 600571 + assertResult(600571)( + result.apply(0).get(0) ) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f @@ -455,10 +454,10 @@ class GlutenClickHouseMergeTreeWriteSuite val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) // 4 parts belong to the first batch // 2 parts belong to the second batch (1 actual updated part, 1 passively updated). - assert(addFiles.size == 6) + assertResult(6)(addFiles.size) val filePaths = addFiles.map(_.path).groupBy(name => name.substring(0, name.lastIndexOf("_"))) - assert(filePaths.size == 2) - assert(Array(3, 3).sameElements(filePaths.values.map(paths => paths.size).toArray.sorted)) + assertResult(2)(filePaths.size) + assertResult(Array(2, 4))(filePaths.values.map(paths => paths.size).toArray.sorted) } { @@ -468,9 +467,7 @@ class GlutenClickHouseMergeTreeWriteSuite val df3 = spark.sql(s""" | select count(*) from lineitem_mergetree_delete |""".stripMargin) - assert( - df3.collect().apply(0).get(0) == 400089 - ) + assertResult(400089)(df3.collect().apply(0).get(0)) } } @@ -512,9 +509,7 @@ class GlutenClickHouseMergeTreeWriteSuite val df0 = spark.sql(s""" | select count(*) from lineitem_mergetree_upsert |""".stripMargin) - assert( - df0.collect().apply(0).get(0) == 600572 - ) + assertResult(600572)(df0.collect().apply(0).get(0)) } upsertSourceTableAndCheck("lineitem_mergetree_upsert") @@ -551,18 +546,14 @@ class GlutenClickHouseMergeTreeWriteSuite val df1 = spark.sql(s""" | select count(*) from $tableName |""".stripMargin) - assert( - df1.collect().apply(0).get(0) == 600572 + 3506 - ) + assertResult(600572 + 3506)(df1.collect().apply(0).get(0)) } { val df2 = spark.sql(s""" | select count(*) from $tableName where l_returnflag = 'Z' |""".stripMargin) - assert( - df2.collect().apply(0).get(0) == 3506 - ) + assertResult(3506)(df2.collect().apply(0).get(0)) } { @@ -570,9 +561,7 @@ class GlutenClickHouseMergeTreeWriteSuite spark.sql(s""" | select count(*) from $tableName where l_orderkey > 10000000 |""".stripMargin) - assert( - df3.collect().apply(0).get(0) == 3506 - ) + assertResult(3506)(df3.collect().apply(0).get(0)) } } @@ -642,33 +631,31 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_shipdate,l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_shipdate,l_orderkey")) - assert( + .mkString(",")) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_shipdate")) + .mkString(",")) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert(addFiles.map(_.rows).sum == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) } } @@ -800,62 +787,56 @@ class GlutenClickHouseMergeTreeWriteSuite runTPCHQueryBySQL(1, sqlStr, compareResult = false) { df => val result = df.collect() - assert(result.size == 4) - assert(result(0).getString(0).equals("A")) - assert(result(0).getString(1).equals("F")) - assert(result(0).getDouble(2) == 3865234.0) + assertResult(4)(result.length) + assertResult("A")(result(0).getString(0)) + assertResult("F")(result(0).getString(1)) + assertResult(3865234.0)(result(0).getDouble(2)) - assert(result(2).getString(0).equals("N")) - assert(result(2).getString(1).equals("O")) - assert(result(2).getDouble(2) == 7454519.0) + assertResult("N")(result(2).getString(0)) + assertResult("O")(result(2).getString(1)) + assertResult(7454519.0)(result(2).getDouble(2)) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) - assert(mergetreeScan.metrics("numFiles").value == 3745) + assertResult(3745)(mergetreeScan.metrics("numFiles").value) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_orderkey")) - assert( + .mkString(",")) + assertResult("l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_orderkey")) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size == 2) - assert( + .mkString(",")) + assertResult(2)(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) - .partitionColumns(0) - .equals("l_shipdate")) - assert( + .partitionColumns + .head) + assertResult("l_returnflag")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) - .partitionColumns(1) - .equals("l_returnflag")) + .partitionColumns(1)) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 3836) - assert(addFiles.map(_.rows).sum == 605363) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1992-06-01")).size == 2) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-01-01")).size == 4) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1995-01-21")).size == 3) + assertResult(3836)(addFiles.size) + assertResult(605363)(addFiles.map(_.rows).sum) + assertResult(2)(addFiles.count(_.partitionValues("l_shipdate").equals("1992-06-01"))) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1993-01-01"))) + assertResult(3)(addFiles.count(_.partitionValues("l_shipdate").equals("1995-01-21"))) } } @@ -927,49 +908,40 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) - assert(!ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) + assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isDefined) if (sparkVersion.equals("3.2")) { assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).orderByKeyOption.isEmpty) } else { - assert( + assertResult("l_partkey,l_returnflag")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_partkey,l_returnflag")) + .mkString(",")) } assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size == 1) - assert( + assertResult(1)(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.size) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) - .partitionColumns(0) - .equals("l_shipdate")) + .partitionColumns + .head) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 10089) - assert(addFiles.map(_.rows).sum == 600572) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1992-06-01")).size == 4) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1993-01-01")).size == 4) - assert( - addFiles.filter(_.partitionValues.get("l_shipdate").get.equals("1995-01-21")).size == 4) - assert( - addFiles - .filter( - f => - f.partitionValues.get("l_shipdate").get.equals("1995-01-21") && f.bucketNum.equals( - "00000")) - .size == 1) + assertResult(10089)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1992-06-01"))) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1993-01-01"))) + assertResult(4)(addFiles.count(_.partitionValues("l_shipdate").equals("1995-01-21"))) + assertResult(1)(addFiles.count( + f => f.partitionValues("l_shipdate").equals("1995-01-21") && f.bucketNum.equals("00000"))) } // check part pruning effect of filter on bucket column val df = spark.sql(s""" @@ -984,7 +956,7 @@ class GlutenClickHouseMergeTreeWriteSuite .flatMap(partition => partition.asInstanceOf[GlutenMergeTreePartition].partList) .map(_.name) .distinct - assert(touchedParts.size == 1) + assertResult(1)(touchedParts.size) // test upsert on partitioned & bucketed table upsertSourceTableAndCheck("lineitem_mergetree_bucket") @@ -996,9 +968,7 @@ class GlutenClickHouseMergeTreeWriteSuite val df0 = spark.sql(s""" | select count(*) from lineitem_mergetree_bucket |""".stripMargin) - assert( - df0.collect().apply(0).get(0) == 3 - ) + assertResult(3)(df0.collect().apply(0).get(0)) } @@ -1065,40 +1035,40 @@ class GlutenClickHouseMergeTreeWriteSuite warehouse + "/" + tableName } val deletedPath = new File(deletedPathStr) - assert(deletedPath.exists() == exceptedExists) + assertResult(exceptedExists)(deletedPath.exists()) } // test non external table var tableName = "lineitem_mergetree_drop" var tableLocation = "" createAndDropTable(tableName, tableLocation) - checkTableExists(tableName, tableLocation, false) + checkTableExists(tableName, tableLocation, exceptedExists = false) // test external table tableName = "lineitem_mergetree_external_drop" - createAndDropTable(tableName, tableLocation, true) - checkTableExists(tableName, tableLocation, false) + createAndDropTable(tableName, tableLocation, isExternal = true) + checkTableExists(tableName, tableLocation, exceptedExists = false) // test table with the specified location tableName = "lineitem_mergetree_location_drop" tableLocation = basePath + "/" + tableName createAndDropTable(tableName, tableLocation) - checkTableExists(tableName, tableLocation, true) + checkTableExists(tableName, tableLocation, exceptedExists = true) tableName = "lineitem_mergetree_external_location_drop" tableLocation = basePath + "/" + tableName - createAndDropTable(tableName, tableLocation, true) - checkTableExists(tableName, tableLocation, true) + createAndDropTable(tableName, tableLocation, isExternal = true) + checkTableExists(tableName, tableLocation, exceptedExists = true) tableName = "lineitem_mergetree_location_purge" tableLocation = basePath + "/" + tableName createAndDropTable(tableName, tableLocation, purgeTable = true) - checkTableExists(tableName, tableLocation, false) + checkTableExists(tableName, tableLocation, exceptedExists = false) tableName = "lineitem_mergetree_external_location_purge" tableLocation = basePath + "/" + tableName - createAndDropTable(tableName, tableLocation, true, true) - checkTableExists(tableName, tableLocation, false) + createAndDropTable(tableName, tableLocation, isExternal = true, purgeTable = true) + checkTableExists(tableName, tableLocation, exceptedExists = false) } test("test mergetree CTAS simple") { @@ -1143,9 +1113,9 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] @@ -1155,10 +1125,8 @@ class GlutenClickHouseMergeTreeWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) } } @@ -1289,9 +1257,9 @@ class GlutenClickHouseMergeTreeWriteSuite | |""".stripMargin - assert( + assertResult("R")( // total rows should remain unchanged - spark.sql(sqlStr2).collect().apply(0).get(0) == "R" + spark.sql(sqlStr2).collect().apply(0).get(0) ) // test select * @@ -1359,40 +1327,38 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_shipdate,l_orderkey")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_shipdate,l_orderkey")) - assert( + .mkString(",")) + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .primaryKeyOption .get - .mkString(",") - .equals("l_shipdate")) + .mkString(",")) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert(addFiles.map(_.rows).sum == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) val plans = collect(df.queryExecution.executedPlan) { case scanExec: BasicScanExecTransformer => scanExec } - assert(plans.size == 1) - assert(plans(0).metrics("selectedMarksPk").value === 15) - assert(plans(0).metrics("totalMarksPk").value === 74) + assertResult(1)(plans.size) + assertResult(17)(plans.head.metrics("selectedMarksPk").value) + assertResult(74)(plans.head.metrics("totalMarksPk").value) } } @@ -1447,21 +1413,20 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert( + assertResult("l_shipdate")( ClickHouseTableV2 .getTable(fileIndex.deltaLog) .orderByKeyOption .get - .mkString(",") - .equals("l_shipdate")) + .mkString(",")) assert( ClickHouseTableV2 .getTable(fileIndex.deltaLog) @@ -1470,15 +1435,15 @@ class GlutenClickHouseMergeTreeWriteSuite assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert(addFiles.map(_.rows).sum == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) val plans = collect(df.queryExecution.executedPlan) { case scanExec: BasicScanExecTransformer => scanExec } - assert(plans.size == 1) - assert(plans(0).metrics("selectedMarksPk").value === 15) - assert(plans(0).metrics("totalMarksPk").value === 74) + assertResult(1)(plans.size) + assertResult(17)(plans.head.metrics("selectedMarksPk").value) + assertResult(74)(plans.head.metrics("totalMarksPk").value) } } @@ -1527,21 +1492,21 @@ class GlutenClickHouseMergeTreeWriteSuite runSql(sqlStr)( df => { val result = df.collect() - assert(result.size == 1) - assert(result(0).getLong(0) == 10) + assertResult(1)(result.length) + assertResult(10)(result(0).getLong(0)) val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 1) - assert(addFiles(0).rows == 10) + assertResult(1)(addFiles.size) + assertResult(10)(addFiles.head.rows) }) } @@ -1585,16 +1550,16 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert( - (addFiles.map(_.marks).sum - addFiles.size) == mergetreeScan.metrics("totalMarksPk").value) - assert(mergetreeScan.metrics("selectedMarksPk").value == exceptedCnt) + assertResult(mergetreeScan.metrics("totalMarksPk").value)( + addFiles.map(_.marks).sum - addFiles.size) + assertResult(exceptedCnt)(mergetreeScan.metrics("selectedMarksPk").value) } val sqlStr1 = @@ -1609,8 +1574,8 @@ class GlutenClickHouseMergeTreeWriteSuite runSql(sqlStr1)( df => { val result = df.collect() - assert(result.size == 1) - assert(result(0).getDouble(0).toString.substring(0, 6).equals("2.6480")) + assertResult(1)(result.length) + assertResult("2.6480")(result(0).getDouble(0).toString.substring(0, 6)) checkSelectedMarksCnt(df, 34) }) @@ -1627,10 +1592,10 @@ class GlutenClickHouseMergeTreeWriteSuite runSql(sqlStr2)( df => { val result = df.collect() - assert(result.size == 1) - assert(result(0).getDouble(0).toString.substring(0, 6).equals("5.3379")) + assertResult(1)(result.length) + assertResult("5.3379")(result(0).getDouble(0).toString.substring(0, 6)) - checkSelectedMarksCnt(df, 24) + checkSelectedMarksCnt(df, 29) }) } @@ -1666,18 +1631,16 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assert(addFiles.size == 5) - assert( - addFiles.map(_.rows).sum - == 600572) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) } } @@ -1715,7 +1678,7 @@ class GlutenClickHouseMergeTreeWriteSuite val fileFilter = new WildcardFileFilter("*_0_*") var dataFileList = dataPath.list(fileFilter) - assert(dataFileList.size == 5) + assertResult(6)(dataFileList.length) // test with the normal table spark.sql(s""" @@ -1796,7 +1759,7 @@ class GlutenClickHouseMergeTreeWriteSuite assert(dataPath.isDirectory && dataPath.isDirectory) dataFileList = dataPath.list(fileFilter) - assert(dataFileList.size == 5) + assertResult(6)(dataFileList.length) // re-create the same table for (i <- 0 until 10) { @@ -1818,7 +1781,7 @@ class GlutenClickHouseMergeTreeWriteSuite assert(dataPath.isDirectory && dataPath.isDirectory) dataFileList = dataPath.list(fileFilter) - assert(dataFileList.size == 5) + assertResult(6)(dataFileList.length) } test("test mergetree with primary keys filter pruning by driver") { @@ -1872,22 +1835,22 @@ class GlutenClickHouseMergeTreeWriteSuite Seq(("true", 2), ("false", 3)).foreach( conf => { withSQLConf( - ("spark.gluten.sql.columnar.backend.ch.runtime_settings.enabled_driver_filter_mergetree_index" -> conf._1)) { + "spark.gluten.sql.columnar.backend.ch.runtime_settings.enabled_driver_filter_mergetree_index" -> conf._1) { runTPCHQueryBySQL(6, sqlStr) { df => val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec.size == 1) + assertResult(1)(scanExec.size) - val mergetreeScan = scanExec(0) + val mergetreeScan = scanExec.head assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) val plans = collect(df.queryExecution.executedPlan) { case scanExec: BasicScanExecTransformer => scanExec } - assert(plans.size == 1) - assert(plans(0).getSplitInfos.size == conf._2) + assertResult(1)(plans.size) + assertResult(conf._2)(plans.head.getSplitInfos.size) } } }) @@ -1990,14 +1953,14 @@ class GlutenClickHouseMergeTreeWriteSuite Seq(("true", 2), ("false", 2)).foreach( conf => { withSQLConf( - ("spark.gluten.sql.columnar.backend.ch.runtime_settings.enabled_driver_filter_mergetree_index" -> conf._1)) { + "spark.gluten.sql.columnar.backend.ch.runtime_settings.enabled_driver_filter_mergetree_index" -> conf._1) { runTPCHQueryBySQL(12, sqlStr) { df => val scanExec = collect(df.queryExecution.executedPlan) { case f: BasicScanExecTransformer => f } - assert(scanExec.size == 2) - assert(scanExec(1).getSplitInfos.size == conf._2) + assertResult(2)(scanExec.size) + assertResult(conf._2)(scanExec(1).getSplitInfos.size) } } }) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTableAfterRestart.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTableAfterRestart.scala index ab11c1e0c201..f9e831cb4aa7 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTableAfterRestart.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTableAfterRestart.scala @@ -61,6 +61,9 @@ class GlutenClickHouseTableAfterRestart .set( "spark.gluten.sql.columnar.backend.ch.runtime_settings.mergetree.merge_after_insert", "false") + .set( + "spark.gluten.sql.columnar.backend.ch.runtime_settings.input_format_parquet_max_block_size", + "8192") } override protected def createTPCHNotNullTables(): Unit = { @@ -180,9 +183,9 @@ class GlutenClickHouseTableAfterRestart // for this run, missing count should not increase runTPCHQueryBySQL(1, sqlStr)(_ => {}) val stats1 = ClickhouseSnapshot.deltaScanCache.stats() - assert(stats1.missCount() - oldMissingCount1 == 0) + assertResult(oldMissingCount1)(stats1.missCount()) val stats2 = ClickhouseSnapshot.addFileToAddMTPCache.stats() - assert(stats2.missCount() - oldMissingCount2 == 0) + assertResult(oldMissingCount2)(stats2.missCount()) } val oldMissingCount1 = ClickhouseSnapshot.deltaScanCache.stats().missCount() @@ -194,10 +197,9 @@ class GlutenClickHouseTableAfterRestart // after restart, additionally check stats of delta scan cache val stats1 = ClickhouseSnapshot.deltaScanCache.stats() - assert(stats1.missCount() - oldMissingCount1 == 1) + assertResult(oldMissingCount1 + 1)(stats1.missCount()) val stats2 = ClickhouseSnapshot.addFileToAddMTPCache.stats() - assert(stats2.missCount() - oldMissingCount2 == 5) - + assertResult(oldMissingCount2 + 6)(stats2.missCount()) } test("test optimize after restart") { @@ -222,7 +224,8 @@ class GlutenClickHouseTableAfterRestart restartSpark() spark.sql("optimize table_restart_optimize") - assert(spark.sql("select count(*) from table_restart_optimize").collect().apply(0).get(0) == 4) + assertResult(4)( + spark.sql("select count(*) from table_restart_optimize").collect().apply(0).get(0)) } test("test vacuum after restart") { @@ -250,7 +253,8 @@ class GlutenClickHouseTableAfterRestart spark.sql("vacuum table_restart_vacuum") - assert(spark.sql("select count(*) from table_restart_vacuum").collect().apply(0).get(0) == 4) + assertResult(4)( + spark.sql("select count(*) from table_restart_vacuum").collect().apply(0).get(0)) } test("test update after restart") { @@ -276,7 +280,8 @@ class GlutenClickHouseTableAfterRestart spark.sql("update table_restart_update set name = 'tom' where id = 1") - assert(spark.sql("select count(*) from table_restart_update").collect().apply(0).get(0) == 4) + assertResult(4)( + spark.sql("select count(*) from table_restart_update").collect().apply(0).get(0)) } test("test delete after restart") { @@ -302,7 +307,8 @@ class GlutenClickHouseTableAfterRestart spark.sql("delete from table_restart_delete where where id = 1") - assert(spark.sql("select count(*) from table_restart_delete").collect().apply(0).get(0) == 2) + assertResult(2)( + spark.sql("select count(*) from table_restart_delete").collect().apply(0).get(0)) } test("test drop after restart") { diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index 3cf485aac06b..a892b6f313a4 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -157,6 +157,71 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla checkLengthAndPlan(df, 60141) } + test("not in") { + // integral type + val df = runQueryAndCompare( + "select l_orderkey from lineitem " + + "where l_partkey not in (1552, 674, 1062)") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + checkLengthAndPlan(df, 60053) + + val df2 = runQueryAndCompare( + "select l_orderkey from lineitem " + + "where l_partkey not in (1552, 674) and l_partkey not in (1062)") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + checkLengthAndPlan(df2, 60053) + + val df3 = runQueryAndCompare( + "select l_orderkey from lineitem " + + "where l_partkey not in (1552, 674) and l_partkey != 1062") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + checkLengthAndPlan(df3, 60053) + + // string type + val df4 = + runQueryAndCompare("select o_orderstatus from orders where o_orderstatus not in ('O', 'F')") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + checkLengthAndPlan(df4, 363) + + // bool type + withTable("t") { + sql("create table t (id int, b boolean) using parquet") + sql("insert into t values (1, true), (2, false), (3, null)") + runQueryAndCompare("select * from t where b not in (true)") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + + runQueryAndCompare("select * from t where b not in (true, false)") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + } + + // mix not-in with range + runQueryAndCompare( + "select l_orderkey from lineitem " + + "where l_partkey not in (1552, 674) and l_partkey >= 1552") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + + // mix not-in with in + runQueryAndCompare( + "select l_orderkey from lineitem " + + "where l_partkey not in (1552, 674) and l_partkey in (1552)") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + + // not-in with or relation + runQueryAndCompare( + "select l_orderkey from lineitem " + + "where l_partkey not in (1552, 674) or l_partkey in (1552)") { + checkGlutenOperatorMatch[FileSourceScanExecTransformer] + } + } + test("coalesce") { var df = runQueryAndCompare( "select l_orderkey, coalesce(l_comment, 'default_val') " + diff --git a/cpp-ch/CMakeLists.txt b/cpp-ch/CMakeLists.txt index 0c41f494d2a1..1ef19b3b5651 100644 --- a/cpp-ch/CMakeLists.txt +++ b/cpp-ch/CMakeLists.txt @@ -16,7 +16,9 @@ cmake_minimum_required(VERSION 3.20) file(READ ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse.version CH_VERSION) -set(CH_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ClickHouse CACHE STRING "ClickHouse source dir") +set(CH_SOURCE_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/ClickHouse + CACHE STRING "ClickHouse source dir") string(REGEX REPLACE ".*CH_ORG=([^\n]+).*" "\\1" CH_ORG "${CH_VERSION}") string(REGEX REPLACE ".*CH_BRANCH=([^\n]+).*" "\\1" CH_BRANCH "${CH_VERSION}") string(REGEX REPLACE ".*CH_COMMIT=([^\n]+).*" "\\1" CH_COMMIT "${CH_VERSION}") @@ -27,93 +29,93 @@ message("CH_COMMIT=${CH_COMMIT}") project(libch LANGUAGES C CXX ASM) file(GLOB clickhouse_files "${CH_SOURCE_DIR}/*") -if ("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse") - if (NOT clickhouse_files) - execute_process(COMMAND git clone -b ${CH_BRANCH} --depth 3 https://github.com/${CH_ORG}/ClickHouse.git ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - execute_process(COMMAND git reset --hard ${CH_COMMIT} WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - execute_process(COMMAND git submodule update --init --force --depth 1 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - else() - execute_process(COMMAND git fetch origin ${CH_BRANCH} --depth 3 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - execute_process(COMMAND git checkout ${CH_BRANCH} WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - execute_process(COMMAND git reset --hard ${CH_COMMIT} WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - execute_process(COMMAND git submodule update --init --recursive --force --depth 1 WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) - endif() +if("${CH_SOURCE_DIR}" STREQUAL "${CMAKE_SOURCE_DIR}/ClickHouse") + if(NOT clickhouse_files) + execute_process( + COMMAND + git clone -b ${CH_BRANCH} --depth 3 + https://github.com/${CH_ORG}/ClickHouse.git ${CH_SOURCE_DIR} + COMMAND_ERROR_IS_FATAL ANY) + execute_process( + COMMAND git reset --hard ${CH_COMMIT} + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + execute_process( + COMMAND git submodule update --init --force --depth 1 + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + else() + execute_process( + COMMAND git fetch origin ${CH_BRANCH} --depth 3 + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + execute_process( + COMMAND git checkout ${CH_BRANCH} + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + execute_process( + COMMAND git reset --hard ${CH_COMMIT} + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + execute_process( + COMMAND git submodule update --init --recursive --force --depth 1 + WORKING_DIRECTORY ${CH_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY) + endif() else() - if (NOT clickhouse_files) - # Checking out *all* submodules takes > 5 min. Therefore, the smoke build ("FastTest") in CI initializes only the set of - # submodules minimally needed for a build and we cannot assume here that all submodules are populated. - message(ERROR "clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix try run:") - message(STATUS " git clone --recursive --depth 1 https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}") - endif() + if(NOT clickhouse_files) + # Checking out *all* submodules takes > 5 min. Therefore, the smoke build + # ("FastTest") in CI initializes only the set of submodules minimally needed + # for a build and we cannot assume here that all submodules are populated. + message(ERROR + "clickhouse ${CH_SOURCE_DIR} is missing or empty. to fix try run:") + message( + STATUS + " git clone --recursive --depth 1 https://github.com/Kyligence/ClickHouse.git ${CMAKE_SOURCE_DIR}" + ) + endif() endif() -if (EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine") - execute_process(COMMAND rm -rf ${CH_SOURCE_DIR}/utils/extern-local-engine) -endif () -execute_process(COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine ${CH_SOURCE_DIR}/utils/extern-local-engine COMMAND_ERROR_IS_FATAL ANY) +if(EXISTS "${CH_SOURCE_DIR}/utils/extern-local-engine") + execute_process(COMMAND rm -rf ${CH_SOURCE_DIR}/utils/extern-local-engine) +endif() +execute_process( + COMMAND ln -s ${CMAKE_CURRENT_SOURCE_DIR}/local-engine + ${CH_SOURCE_DIR}/utils/extern-local-engine COMMAND_ERROR_IS_FATAL ANY) -# execute_process(COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/local-engine -regex '.*\.\(c\|cpp\|h\)' -exec clang-format-15 --verbose -i --style=file -i {} \;) +# execute_process(COMMAND find ${CMAKE_CURRENT_SOURCE_DIR}/local-engine -regex +# '.*\.\(c\|cpp\|h\)' -exec clang-format-15 --verbose -i --style=file -i {} \;) set(CH_BINARY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/build") option(ENABLE_CPP_TEST "Build CPP Unit test" OFF) -if (ENABLE_CPP_TEST) -add_custom_command( - USES_TERMINAL - COMMAND - bash -c - \"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DENABLE_PROTOBUF=ON - -DENABLE_TESTS=ON - -DENABLE_JEMALLOC=ON - -DENABLE_MULTITARGET_CODE=ON - -DENABLE_EXTERN_LOCAL_ENGINE=ON - -DCOMPILER_FLAGS='-fvisibility=hidden -fvisibility-inlines-hidden' - -S ${CH_SOURCE_DIR} -G Ninja -B ${CH_BINARY_DIR} && - cmake --build ${CH_BINARY_DIR} --target ch unit_tests_local_engine\" - OUTPUT _build_ch) +if(ENABLE_CPP_TEST) + add_custom_command( + USES_TERMINAL + COMMAND + bash -c \"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DENABLE_PROTOBUF=ON + -DENABLE_TESTS=ON -DENABLE_JEMALLOC=ON -DENABLE_MULTITARGET_CODE=ON + -DENABLE_EXTERN_LOCAL_ENGINE=ON -DCOMPILER_FLAGS='-fvisibility=hidden + -fvisibility-inlines-hidden' -S ${CH_SOURCE_DIR} -G Ninja -B + ${CH_BINARY_DIR} && cmake --build ${CH_BINARY_DIR} --target ch + unit_tests_local_engine\" + OUTPUT _build_ch) else() -add_custom_command( - USES_TERMINAL - COMMAND - bash -c - \"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DENABLE_PROTOBUF=ON - -DENABLE_TESTS=OFF - -DENABLE_JEMALLOC=ON - -DENABLE_MULTITARGET_CODE=ON - -DENABLE_EXTERN_LOCAL_ENGINE=ON - -DENABLE_ODBC=OFF - -DENABLE_CAPNP=OFF - -DENABLE_ROCKSDB=OFF - -DENABLE_GRPC=OFF - -DENABLE_RUST=OFF - -DENABLE_H3=OFF - -DENABLE_AMQPCPP=OFF - -DENABLE_CASSANDRA=OFF - -DENABLE_KAFKA=OFF - -DENABLE_NATS=OFF - -DENABLE_LIBPQXX=OFF - -DENABLE_NURAFT=OFF - -DENABLE_DATASKETCHES=OFF - -DENABLE_SQLITE=OFF - -DENABLE_S2_GEOMETRY=OFF - -DENABLE_ANNOY=OFF - -DENABLE_ULID=OFF - -DENABLE_MYSQL=OFF - -DENABLE_BCRYPT=OFF - -DENABLE_LDAP=OFF - -DENABLE_MSGPACK=OFF - -DUSE_REPLXX=OFF - -DENABLE_CLICKHOUSE_ALL=OFF - -DCOMPILER_FLAGS='-fvisibility=hidden -fvisibility-inlines-hidden' - -S ${CH_SOURCE_DIR} -G Ninja -B ${CH_BINARY_DIR} && - cmake --build ${CH_BINARY_DIR} --target libch\" - OUTPUT _build_ch) + add_custom_command( + USES_TERMINAL + COMMAND + bash -c \"cmake -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DENABLE_PROTOBUF=ON + -DENABLE_TESTS=OFF -DENABLE_JEMALLOC=ON -DENABLE_MULTITARGET_CODE=ON + -DENABLE_EXTERN_LOCAL_ENGINE=ON -DENABLE_ODBC=OFF -DENABLE_CAPNP=OFF + -DENABLE_ROCKSDB=OFF -DENABLE_GRPC=OFF -DENABLE_RUST=OFF -DENABLE_H3=OFF + -DENABLE_AMQPCPP=OFF -DENABLE_CASSANDRA=OFF -DENABLE_KAFKA=OFF + -DENABLE_NATS=OFF -DENABLE_LIBPQXX=OFF -DENABLE_NURAFT=OFF + -DENABLE_DATASKETCHES=OFF -DENABLE_SQLITE=OFF -DENABLE_S2_GEOMETRY=OFF + -DENABLE_ANNOY=OFF -DENABLE_ULID=OFF -DENABLE_MYSQL=OFF + -DENABLE_BCRYPT=OFF -DENABLE_LDAP=OFF -DENABLE_MSGPACK=OFF + -DUSE_REPLXX=OFF -DENABLE_CLICKHOUSE_ALL=OFF + -DCOMPILER_FLAGS='-fvisibility=hidden -fvisibility-inlines-hidden' -S + ${CH_SOURCE_DIR} -G Ninja -B ${CH_BINARY_DIR} && cmake --build + ${CH_BINARY_DIR} --target libch\" + OUTPUT _build_ch) endif() add_custom_target(build_ch ALL DEPENDS _build_ch) diff --git a/cpp-ch/local-engine/CMakeLists.txt b/cpp-ch/local-engine/CMakeLists.txt index 8c96c5f98f71..93ee4b8218af 100644 --- a/cpp-ch/local-engine/CMakeLists.txt +++ b/cpp-ch/local-engine/CMakeLists.txt @@ -13,22 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. - -if (ENABLE_MULTITARGET_CODE) - add_definitions(-DENABLE_MULTITARGET_CODE=1) +if(ENABLE_MULTITARGET_CODE) + add_definitions(-DENABLE_MULTITARGET_CODE=1) else() - add_definitions(-DENABLE_MULTITARGET_CODE=0) + add_definitions(-DENABLE_MULTITARGET_CODE=0) endif() -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections -fdata-sections") -set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w -ffunction-sections -fdata-sections") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w -ffunction-sections -fdata-sections") +set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic -Wl,--gc-sections") -if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") +if(COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) + set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") else() - set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") -endif () + set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") +endif() set(THRIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src") @@ -37,7 +39,7 @@ include(FindJava) include(UseJava) include(FindJNI) -#set(JNI_NATIVE_SOURCES local_engine_jni.cpp) +# set(JNI_NATIVE_SOURCES local_engine_jni.cpp) set(LOCALENGINE_SHARED_LIB _gluten_ch) add_subdirectory(proto) @@ -61,116 +63,124 @@ add_headers_and_sources(disks Disks) add_headers_and_sources(disks Disks/ObjectStorages) include_directories( - ${JNI_INCLUDE_DIRS} - ${CMAKE_CURRENT_BINARY_DIR}/proto - ${THRIFT_INCLUDE_DIR} - ${CMAKE_BINARY_DIR}/contrib/thrift-cmake - ${CMAKE_BINARY_DIR}/contrib/llvm-project/llvm/include - ${CMAKE_CURRENT_SOURCE_DIR} - ${ClickHouse_SOURCE_DIR}/src - ${ClickHouse_SOURCE_DIR}/base - ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include - ${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src/orc/c++/include - ${CMAKE_BINARY_DIR}/contrib/orc/c++/include - ${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/storage/azure-storage-blobs/inc - ${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/core/azure-core/inc - ${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/storage/azure-storage-common/inc - ${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include - ${ClickHouse_SOURCE_DIR}/contrib/llvm-project/utils/bazel/llvm-project-overlay/llvm/include + ${JNI_INCLUDE_DIRS} + ${CMAKE_CURRENT_BINARY_DIR}/proto + ${THRIFT_INCLUDE_DIR} + ${CMAKE_BINARY_DIR}/contrib/thrift-cmake + ${CMAKE_BINARY_DIR}/contrib/llvm-project/llvm/include + ${CMAKE_CURRENT_SOURCE_DIR} + ${ClickHouse_SOURCE_DIR}/src + ${ClickHouse_SOURCE_DIR}/base + ${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include + ${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src/orc/c++/include + ${CMAKE_BINARY_DIR}/contrib/orc/c++/include + ${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/storage/azure-storage-blobs/inc + ${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/core/azure-core/inc + ${ClickHouse_SOURCE_DIR}/contrib/azure/sdk/storage/azure-storage-common/inc + ${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include + ${ClickHouse_SOURCE_DIR}/contrib/llvm-project/utils/bazel/llvm-project-overlay/llvm/include ) add_subdirectory(Storages/Parquet) add_subdirectory(Storages/SubstraitSource) add_subdirectory(Functions) -add_library(gluten_clickhouse_backend_libs - ${builder_sources} - ${join_sources} - ${parser_sources} - ${rewriter_sources} - ${storages_sources} - ${common_sources} - ${external_sources} - ${shuffle_sources} - ${operator_sources} - ${aggregate_functions_sources} - ${jni_sources} - ${disks_sources} -) - -target_link_libraries(gluten_clickhouse_backend_libs PUBLIC - substrait_source - clickhouse_aggregate_functions - clickhouse_functions - gluten_spark_functions - ch_contrib::xxHash -) +add_library( + gluten_clickhouse_backend_libs + ${builder_sources} + ${join_sources} + ${parser_sources} + ${rewriter_sources} + ${storages_sources} + ${common_sources} + ${external_sources} + ${shuffle_sources} + ${operator_sources} + ${aggregate_functions_sources} + ${jni_sources} + ${disks_sources}) + +target_link_libraries( + gluten_clickhouse_backend_libs + PUBLIC substrait_source clickhouse_aggregate_functions clickhouse_functions + gluten_spark_functions ch_contrib::xxHash) # Add udf sources files in sub directories to functions_sources -option(ENABLE_LOCAL_UDFS "Build UDFs in 'local-engine/Parser/*_udf' subdirectories" ON) -if (ENABLE_LOCAL_UDFS) - file(GLOB children CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} Parser/*_udf) - foreach(child ${children}) - add_headers_and_sources(local_udfs ${child}) - endforeach() -endif () - -file(GLOB children CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} Parser/*_function_parser) +option(ENABLE_LOCAL_UDFS + "Build UDFs in 'local-engine/Parser/*_udf' subdirectories" ON) +if(ENABLE_LOCAL_UDFS) + file( + GLOB children CONFIGURE_DEPENDS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + Parser/*_udf) + foreach(child ${children}) + add_headers_and_sources(local_udfs ${child}) + endforeach() +endif() + +file( + GLOB children CONFIGURE_DEPENDS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + Parser/*_function_parser) foreach(child ${children}) - add_headers_and_sources(function_parsers ${child}) + add_headers_and_sources(function_parsers ${child}) endforeach() -# Notice: soures files under Parser/*_udf subdirectories must be built into target ${LOCALENGINE_SHARED_LIB} directly -# to make sure all function parsers are registered successly. -add_library(${LOCALENGINE_SHARED_LIB} SHARED - local_engine_jni.cpp - ${local_udfs_sources} - ${function_parsers_sources} - $) # why add clickhouse_malloc? check clickhouse PR-8046 +# Notice: soures files under Parser/*_udf subdirectories must be built into +# target ${LOCALENGINE_SHARED_LIB} directly to make sure all function parsers +# are registered successly. +add_library( + ${LOCALENGINE_SHARED_LIB} SHARED + local_engine_jni.cpp ${local_udfs_sources} ${function_parsers_sources} + $) # why add clickhouse_malloc? check + # clickhouse PR-8046 target_compile_options(${LOCALENGINE_SHARED_LIB} PUBLIC -fPIC - -Wno-shorten-64-to-32) - -target_link_libraries(${LOCALENGINE_SHARED_LIB} -PUBLIC - clickhouse_new_delete - clickhouse_common_config - clickhouse_common_io - clickhouse_parsers - clickhouse_storages_system - loggers - gluten_clickhouse_backend_libs - ch_contrib::protobuf -PRIVATE - substrait -) + -Wno-shorten-64-to-32) + +target_link_libraries( + ${LOCALENGINE_SHARED_LIB} + PUBLIC clickhouse_new_delete + clickhouse_common_config + clickhouse_common_io + clickhouse_parsers + clickhouse_storages_system + loggers + gluten_clickhouse_backend_libs + ch_contrib::protobuf + PRIVATE substrait) target_link_libraries(${LOCALENGINE_SHARED_LIB} PUBLIC ch_parquet) -if (ENABLE_JEMALLOC) - target_link_options(${LOCALENGINE_SHARED_LIB} PRIVATE - -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch.map -Wl,-Bsymbolic-functions) +if(ENABLE_JEMALLOC) + target_link_options( + ${LOCALENGINE_SHARED_LIB} PRIVATE + -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch.map + -Wl,-Bsymbolic-functions) else() - target_link_options(${LOCALENGINE_SHARED_LIB} PRIVATE - -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch-hide-jemalloc.map) + target_link_options( + ${LOCALENGINE_SHARED_LIB} PRIVATE + -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/libch-hide-jemalloc.map) endif() -if ("${CMAKE_BUILD_TYPE}" MATCHES "Debug") - set(LOCALENGINE_SHARED_LIB_NAME "libchd.so") -else () - set(LOCALENGINE_SHARED_LIB_NAME "libch.so") -endif () +if("${CMAKE_BUILD_TYPE}" MATCHES "Debug") + set(LOCALENGINE_SHARED_LIB_NAME "libchd.so") +else() + set(LOCALENGINE_SHARED_LIB_NAME "libch.so") +endif() add_custom_command( - OUTPUT ${LOCALENGINE_SHARED_LIB_NAME} - COMMAND ${CMAKE_COMMAND} -E rename $ ${LOCALENGINE_SHARED_LIB_NAME} - COMMENT "Renaming $ to ${LOCALENGINE_SHARED_LIB_NAME}" - DEPENDS ${LOCALENGINE_SHARED_LIB}) + OUTPUT ${LOCALENGINE_SHARED_LIB_NAME} + COMMAND ${CMAKE_COMMAND} -E rename $ + ${LOCALENGINE_SHARED_LIB_NAME} + COMMENT + "Renaming $ to ${LOCALENGINE_SHARED_LIB_NAME}" + DEPENDS ${LOCALENGINE_SHARED_LIB}) add_custom_target(libch ALL DEPENDS ${LOCALENGINE_SHARED_LIB_NAME}) add_subdirectory(tests) -if (ENABLE_EXAMPLES) - add_subdirectory(examples) -endif() \ No newline at end of file +if(ENABLE_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/cpp-ch/local-engine/Functions/CMakeLists.txt b/cpp-ch/local-engine/Functions/CMakeLists.txt index 5968c86094f7..74697315597b 100644 --- a/cpp-ch/local-engine/Functions/CMakeLists.txt +++ b/cpp-ch/local-engine/Functions/CMakeLists.txt @@ -16,51 +16,52 @@ add_headers_and_sources(gluten_spark_functions .) add_library(gluten_spark_functions_obj OBJECT ${gluten_spark_functions_sources}) -list (APPEND PRIVATE_LIBS - boost::headers_only - pcg_random - Poco::Foundation - Poco::Util - Poco::Net - Poco::JSON - ch_contrib::cctz - ch_contrib::fmt - ch_contrib::pdqsort - ch_contrib::miniselect - ch_contrib::magic_enum - ch_contrib::double_conversion - ch_contrib::dragonbox_to_chars - ch_contrib::re2 - ch_contrib::abseil_swiss_tables - ch_contrib::sparsehash - ch_contrib::metrohash - ch_contrib::murmurhash - ch_contrib::wyhash - ch_contrib::cityhash - ch_contrib::farmhash - ch_contrib::xxHash - OpenSSL::SSL -) +list( + APPEND + PRIVATE_LIBS + boost::headers_only + pcg_random + Poco::Foundation + Poco::Util + Poco::Net + Poco::JSON + ch_contrib::cctz + ch_contrib::fmt + ch_contrib::pdqsort + ch_contrib::miniselect + ch_contrib::magic_enum + ch_contrib::double_conversion + ch_contrib::dragonbox_to_chars + ch_contrib::re2 + ch_contrib::abseil_swiss_tables + ch_contrib::sparsehash + ch_contrib::metrohash + ch_contrib::murmurhash + ch_contrib::wyhash + ch_contrib::cityhash + ch_contrib::farmhash + ch_contrib::xxHash + OpenSSL::SSL) -if (TARGET ch_contrib::vectorscan) - list (APPEND PRIVATE_LIBS ch_contrib::vectorscan) +if(TARGET ch_contrib::vectorscan) + list(APPEND PRIVATE_LIBS ch_contrib::vectorscan) endif() -if (TARGET ch_contrib::rapidjson) - list (APPEND PRIVATE_LIBS ch_contrib::rapidjson) +if(TARGET ch_contrib::rapidjson) + list(APPEND PRIVATE_LIBS ch_contrib::rapidjson) endif() -if (TARGET ch_contrib::simdjson) - list (APPEND PRIVATE_LIBS ch_contrib::simdjson) +if(TARGET ch_contrib::simdjson) + list(APPEND PRIVATE_LIBS ch_contrib::simdjson) endif() -if (TARGET ch_rust::blake3) - list (APPEND PRIVATE_LIBS ch_rust::blake3) +if(TARGET ch_rust::blake3) + list(APPEND PRIVATE_LIBS ch_rust::blake3) endif() -list (APPEND OBJECT_LIBS $) +list(APPEND OBJECT_LIBS $) target_link_libraries(gluten_spark_functions_obj PRIVATE ${PRIVATE_LIBS}) add_library(gluten_spark_functions INTERFACE) -target_link_libraries(gluten_spark_functions INTERFACE ${OBJECT_LIBS}) \ No newline at end of file +target_link_libraries(gluten_spark_functions INTERFACE ${OBJECT_LIBS}) diff --git a/cpp-ch/local-engine/Storages/Parquet/CMakeLists.txt b/cpp-ch/local-engine/Storages/Parquet/CMakeLists.txt index f3d9e14f4b9c..bfe538710804 100644 --- a/cpp-ch/local-engine/Storages/Parquet/CMakeLists.txt +++ b/cpp-ch/local-engine/Storages/Parquet/CMakeLists.txt @@ -18,13 +18,10 @@ set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src") add_headers_and_sources(Parquet .) add_library(ch_parquet ${Parquet_sources}) -target_link_libraries(ch_parquet PUBLIC - boost::headers_only - clickhouse_common_io -) +target_link_libraries(ch_parquet PUBLIC boost::headers_only + clickhouse_common_io) -target_include_directories(ch_parquet SYSTEM BEFORE PUBLIC - ${ARROW_INCLUDE_DIR} - ${CMAKE_BINARY_DIR}/contrib/arrow-cmake/cpp/src - ${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src -) +target_include_directories( + ch_parquet SYSTEM BEFORE + PUBLIC ${ARROW_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/arrow-cmake/cpp/src + ${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src) diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt b/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt index 4e43c924f6fe..228f54255cb4 100644 --- a/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt +++ b/cpp-ch/local-engine/Storages/SubstraitSource/CMakeLists.txt @@ -15,30 +15,24 @@ set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src") - macro(add_headers_and_sources_including_cc prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.cc ${common_path}/*.h) + add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) + add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c + ${common_path}/*.cc ${common_path}/*.h) endmacro() add_headers_and_sources(substrait_source .) add_headers_and_sources_including_cc(ch_parquet arrow) add_library(substrait_source ${substrait_source_sources}) -target_compile_options(substrait_source PRIVATE - -Wno-suggest-destructor-override - -Wno-inconsistent-missing-destructor-override -) +target_compile_options( + substrait_source PRIVATE -Wno-suggest-destructor-override + -Wno-inconsistent-missing-destructor-override) -target_link_libraries(substrait_source PUBLIC - boost::headers_only - ch_contrib::protobuf - clickhouse_common_io - ch_contrib::hdfs - substrait -) +target_link_libraries( + substrait_source PUBLIC boost::headers_only ch_contrib::protobuf + clickhouse_common_io ch_contrib::hdfs substrait) -target_include_directories(substrait_source SYSTEM BEFORE PUBLIC - ${ARROW_INCLUDE_DIR} - ${CMAKE_BINARY_DIR}/contrib/arrow-cmake/cpp/src - ${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src -) \ No newline at end of file +target_include_directories( + substrait_source SYSTEM BEFORE + PUBLIC ${ARROW_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/arrow-cmake/cpp/src + ${ClickHouse_SOURCE_DIR}/contrib/arrow-cmake/cpp/src) diff --git a/cpp-ch/local-engine/examples/CMakeLists.txt b/cpp-ch/local-engine/examples/CMakeLists.txt index bbeeb98d2445..03cd3bfe3f19 100644 --- a/cpp-ch/local-engine/examples/CMakeLists.txt +++ b/cpp-ch/local-engine/examples/CMakeLists.txt @@ -13,5 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -clickhouse_add_executable (signal_demo signal_demo.cpp) -target_link_libraries(signal_demo PRIVATE gluten_clickhouse_backend_libs loggers) \ No newline at end of file +clickhouse_add_executable(signal_demo signal_demo.cpp) +target_link_libraries(signal_demo PRIVATE gluten_clickhouse_backend_libs + loggers) diff --git a/cpp-ch/local-engine/proto/CMakeLists.txt b/cpp-ch/local-engine/proto/CMakeLists.txt index 31583ff659ed..ffb34504af52 100644 --- a/cpp-ch/local-engine/proto/CMakeLists.txt +++ b/cpp-ch/local-engine/proto/CMakeLists.txt @@ -12,37 +12,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -file(GLOB protobuf_files - substrait/*.proto - substrait/extensions/*.proto - ) +file(GLOB protobuf_files substrait/*.proto substrait/extensions/*.proto) -FOREACH(FIL ${protobuf_files}) - file(RELATIVE_PATH FIL_RELATIVE ${ClickHouse_SOURCE_DIR}/utils/extern-local-engine/proto/ ${FIL}) - string(REGEX REPLACE "\\.proto" "" FILE_NAME ${FIL_RELATIVE}) - LIST(APPEND SUBSTRAIT_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_NAME}.pb.cc") - LIST(APPEND SUBSTRAIT_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_NAME}.pb.h") -ENDFOREACH() +foreach(FIL ${protobuf_files}) + file(RELATIVE_PATH FIL_RELATIVE + ${ClickHouse_SOURCE_DIR}/utils/extern-local-engine/proto/ ${FIL}) + string(REGEX REPLACE "\\.proto" "" FILE_NAME ${FIL_RELATIVE}) + list(APPEND SUBSTRAIT_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_NAME}.pb.cc") + list(APPEND SUBSTRAIT_HEADERS "${CMAKE_CURRENT_BINARY_DIR}/${FILE_NAME}.pb.h") +endforeach() # Generate Substrait headers add_custom_command( - OUTPUT ${SUBSTRAIT_SRCS} ${SUBSTRAIT_HEADERS} - COMMAND - $ --cpp_out ${CMAKE_CURRENT_BINARY_DIR} - --proto_path ${CMAKE_CURRENT_SOURCE_DIR} - --proto_path ${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src - ${protobuf_files} - DEPENDS ${protobuf_files} - COMMENT "Running cpp protocol buffer compiler" - VERBATIM) -add_custom_target(generate_substrait ALL DEPENDS ${SUBSTRAIT_SRCS} ${SUBSTRAIT_HEADERS}) + OUTPUT ${SUBSTRAIT_SRCS} ${SUBSTRAIT_HEADERS} + COMMAND + $ --cpp_out ${CMAKE_CURRENT_BINARY_DIR} --proto_path + ${CMAKE_CURRENT_SOURCE_DIR} --proto_path + ${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src ${protobuf_files} + DEPENDS ${protobuf_files} + COMMENT "Running cpp protocol buffer compiler" + VERBATIM) +add_custom_target(generate_substrait ALL DEPENDS ${SUBSTRAIT_SRCS} + ${SUBSTRAIT_HEADERS}) set_source_files_properties(${SUBSTRAIT_SRCS} PROPERTIES GENERATED TRUE) add_library(substrait ${SUBSTRAIT_SRCS}) add_dependencies(substrait generate_substrait) -target_compile_options(substrait PUBLIC -fPIC - -Wno-reserved-identifier - -Wno-deprecated) -target_include_directories(substrait SYSTEM BEFORE PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_compile_options(substrait PUBLIC -fPIC -Wno-reserved-identifier + -Wno-deprecated) +target_include_directories(substrait SYSTEM BEFORE + PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(substrait ch_contrib::protobuf) - diff --git a/cpp-ch/local-engine/tests/CMakeLists.txt b/cpp-ch/local-engine/tests/CMakeLists.txt index 9781a332e89c..be02bf6234d2 100644 --- a/cpp-ch/local-engine/tests/CMakeLists.txt +++ b/cpp-ch/local-engine/tests/CMakeLists.txt @@ -12,76 +12,92 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/testConfig.h.in ${CMAKE_CURRENT_SOURCE_DIR}/testConfig.h) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/testConfig.h.in + ${CMAKE_CURRENT_SOURCE_DIR}/testConfig.h) -if (ENABLE_TESTS) - macro(add_gtest_sources prefix common_path) - add_glob(${prefix}_sources ${common_path}/gtest*.cpp) - endmacro() +if(ENABLE_TESTS) + macro(add_gtest_sources prefix common_path) + add_glob(${prefix}_sources ${common_path}/gtest*.cpp) + endmacro() - set(USE_INTERNAL_GTEST_LIBRARY 0) - set(BENCHMARK_ENABLE_TESTING OFF) + set(USE_INTERNAL_GTEST_LIBRARY 0) + set(BENCHMARK_ENABLE_TESTING OFF) - enable_testing() - include(CTest) + enable_testing() + include(CTest) - include_directories(${GTEST_INCLUDE_DIRS}) + include_directories(${GTEST_INCLUDE_DIRS}) - set(TEST_DATA_DIR "${ClickHouse_SOURCE_DIR}/utils/extern-local-engine/tests") - set(HAVE_POSIX_REGEX 1) - set(LOCAL_ENGINE_DIR "${ClickHouse_SOURCE_DIR}/utils/extern-local-engine") + set(TEST_DATA_DIR "${ClickHouse_SOURCE_DIR}/utils/extern-local-engine/tests") + set(HAVE_POSIX_REGEX 1) + set(LOCAL_ENGINE_DIR "${ClickHouse_SOURCE_DIR}/utils/extern-local-engine") - add_gtest_sources(local_engine_gtest .) - if (ENABLE_LOCAL_UDFS) - file(GLOB children CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ../Parser/*_udf/tests) - foreach(child ${children}) - add_gtest_sources(local_engine_gtest ${child}) - endforeach() - - file(GLOB children CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ../Parser/*_udf) - foreach(child ${children}) - add_headers_and_sources(local_engine_udf ${child}) - endforeach() - endif () + add_gtest_sources(local_engine_gtest .) + if(ENABLE_LOCAL_UDFS) + file( + GLOB children CONFIGURE_DEPENDS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + ../Parser/*_udf/tests) + foreach(child ${children}) + add_gtest_sources(local_engine_gtest ${child}) + endforeach() - file(GLOB children CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ../Parser/*_function_parser) + file( + GLOB children CONFIGURE_DEPENDS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + ../Parser/*_udf) foreach(child ${children}) - add_headers_and_sources(local_engine_function_parser ${child}) + add_headers_and_sources(local_engine_udf ${child}) endforeach() + endif() + file( + GLOB children CONFIGURE_DEPENDS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + ../Parser/*_function_parser) + foreach(child ${children}) + add_headers_and_sources(local_engine_function_parser ${child}) + endforeach() - message("local engine gtest sources: ${local_engine_gtest_sources}") - message("local engine udf sources: ${local_engine_udf_sources}") - message("local engine function parser sources: ${local_engine_function_parser_sources}") + message("local engine gtest sources: ${local_engine_gtest_sources}") + message("local engine udf sources: ${local_engine_udf_sources}") + message( + "local engine function parser sources: ${local_engine_function_parser_sources}" + ) - add_executable(unit_tests_local_engine - gluten_test_util.cpp - ${local_engine_gtest_sources} - ${local_engine_udf_sources} - ${local_engine_function_parser_sources}) - target_include_directories(unit_tests_local_engine PRIVATE - ${ClickHouse_SOURCE_DIR}/utils/extern-local_engine - ${CMAKE_CURRENT_SOURCE_DIR} - ) - # no-unreachable-code for GTEST_SKIP - target_compile_options(unit_tests_local_engine PRIVATE -Wno-unreachable-code) - target_link_libraries(unit_tests_local_engine PRIVATE gluten_clickhouse_backend_libs clickhouse_parsers loggers ch_contrib::gmock_all ch_contrib::gtest) - target_link_libraries(unit_tests_local_engine PRIVATE ch_parquet) + add_executable( + unit_tests_local_engine + gluten_test_util.cpp ${local_engine_gtest_sources} + ${local_engine_udf_sources} ${local_engine_function_parser_sources}) + target_include_directories( + unit_tests_local_engine + PRIVATE ${ClickHouse_SOURCE_DIR}/utils/extern-local_engine + ${CMAKE_CURRENT_SOURCE_DIR}) + # no-unreachable-code for GTEST_SKIP + target_compile_options(unit_tests_local_engine PRIVATE -Wno-unreachable-code) + target_link_libraries( + unit_tests_local_engine + PRIVATE gluten_clickhouse_backend_libs clickhouse_parsers loggers + ch_contrib::gmock_all ch_contrib::gtest) + target_link_libraries(unit_tests_local_engine PRIVATE ch_parquet) endif() -if (ENABLE_BENCHMARKS) - include_directories(benchmark_local_engine SYSTEM PUBLIC - ${ClickHouse_SOURCE_DIR}/utils/extern-local_engine - ) - add_executable(benchmark_local_engine - gluten_test_util.cpp - benchmark_local_engine.cpp - benchmark_parquet_read.cpp - benchmark_spark_row.cpp - benchmark_unix_timestamp_function.cpp - benchmark_spark_floor_function.cpp - benchmark_cast_float_function.cpp - benchmark_to_datetime_function.cpp - benchmark_spark_divide_function.cpp) - target_link_libraries(benchmark_local_engine PRIVATE gluten_clickhouse_backend_libs ch_contrib::gbenchmark_all loggers ch_parquet) +if(ENABLE_BENCHMARKS) + include_directories(benchmark_local_engine SYSTEM PUBLIC + ${ClickHouse_SOURCE_DIR}/utils/extern-local_engine) + add_executable( + benchmark_local_engine + gluten_test_util.cpp + benchmark_local_engine.cpp + benchmark_parquet_read.cpp + benchmark_spark_row.cpp + benchmark_unix_timestamp_function.cpp + benchmark_spark_floor_function.cpp + benchmark_cast_float_function.cpp + benchmark_to_datetime_function.cpp + benchmark_spark_divide_function.cpp) + target_link_libraries( + benchmark_local_engine + PRIVATE gluten_clickhouse_backend_libs ch_contrib::gbenchmark_all loggers + ch_parquet) endif() diff --git a/cpp/CMake/BuildGTest.cmake b/cpp/CMake/BuildGTest.cmake index fff99455c419..d85578c0593c 100644 --- a/cpp/CMake/BuildGTest.cmake +++ b/cpp/CMake/BuildGTest.cmake @@ -6,15 +6,14 @@ set(GLUTEN_GTEST_BUILD_SHA256_CHECKSUM ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363) set(GLUTEN_GTEST_SOURCE_URL "https://github.com/google/googletest/archive/refs/tags/v${GLUTEN_GTEST_VERSION}.tar.gz" - ) +) resolve_dependency_url(GTEST) message(STATUS "Building gtest from source") FetchContent_Declare( - gtest - URL ${GLUTEN_GTEST_SOURCE_URL} - URL_HASH "${GLUTEN_GTEST_BUILD_SHA256_CHECKSUM}" -) + gtest + URL ${GLUTEN_GTEST_SOURCE_URL} + URL_HASH "${GLUTEN_GTEST_BUILD_SHA256_CHECKSUM}") FetchContent_MakeAvailable(gtest) diff --git a/cpp/CMake/BuildGflags.cmake b/cpp/CMake/BuildGflags.cmake index 4cb201115835..8e66bd6b9839 100644 --- a/cpp/CMake/BuildGflags.cmake +++ b/cpp/CMake/BuildGflags.cmake @@ -17,10 +17,10 @@ include_guard(GLOBAL) set(GLUTEN_GFLAGS_BUILD_SHA256_CHECKSUM - 34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf) + 34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf) string(CONCAT GLUTEN_GFLAGS_SOURCE_URL - "https://github.com/gflags/gflags/archive/refs/tags/" - "v${GLUTEN_GFLAGS_VERSION}.tar.gz") + "https://github.com/gflags/gflags/archive/refs/tags/" + "v${GLUTEN_GFLAGS_VERSION}.tar.gz") resolve_dependency_url(GFLAGS) diff --git a/cpp/CMake/BuildGlog.cmake b/cpp/CMake/BuildGlog.cmake index cf405225c313..3f0f78a16531 100644 --- a/cpp/CMake/BuildGlog.cmake +++ b/cpp/CMake/BuildGlog.cmake @@ -14,10 +14,10 @@ include_guard(GLOBAL) set(GLUTEN_GLOG_BUILD_SHA256_CHECKSUM - 8a83bf982f37bb70825df71a9709fa90ea9f4447fb3c099e1d720a439d88bad6) + 8a83bf982f37bb70825df71a9709fa90ea9f4447fb3c099e1d720a439d88bad6) set(GLUTEN_GLOG_SOURCE_URL - "https://github.com/google/glog/archive/refs/tags/v${GLUTEN_GLOG_VERSION}.tar.gz" - ) + "https://github.com/google/glog/archive/refs/tags/v${GLUTEN_GLOG_VERSION}.tar.gz" +) resolve_dependency_url(GLOG) @@ -27,7 +27,7 @@ FetchContent_Declare( URL ${GLUTEN_GLOG_SOURCE_URL} URL_HASH SHA256=${GLUTEN_GLOG_BUILD_SHA256_CHECKSUM} PATCH_COMMAND git apply ${CMAKE_CURRENT_LIST_DIR}/glog/glog-no-export.patch - && git apply ${CMAKE_CURRENT_LIST_DIR}/glog/glog-config.patch) + && git apply ${CMAKE_CURRENT_LIST_DIR}/glog/glog-config.patch) set(BUILD_SHARED_LIBS OFF) set(WITH_UNWIND OFF) @@ -48,6 +48,6 @@ endif() # These headers are missing from the include dir but adding the src dir causes # issues with folly so we just copy it to the include dir file(COPY ${glog_SOURCE_DIR}/src/glog/platform.h - DESTINATION ${glog_BINARY_DIR}/glog) + DESTINATION ${glog_BINARY_DIR}/glog) file(COPY ${glog_SOURCE_DIR}/src/glog/log_severity.h - DESTINATION ${glog_BINARY_DIR}/glog) + DESTINATION ${glog_BINARY_DIR}/glog) diff --git a/cpp/CMake/BuildGoogleBenchmark.cmake b/cpp/CMake/BuildGoogleBenchmark.cmake index 8efbb58eab74..a71d73432f62 100644 --- a/cpp/CMake/BuildGoogleBenchmark.cmake +++ b/cpp/CMake/BuildGoogleBenchmark.cmake @@ -21,8 +21,10 @@ include(FetchContent) set(GLUTEN_GBENCHMARK_BUILD_VERSION "v1.6.0") set(GLUTEN_GBENCHMARK_SOURCE_URL "https://github.com/google/benchmark/archive/refs/tags/${GLUTEN_GBENCHMARK_BUILD_VERSION}.tar.gz" - "https://github.com/ursa-labs/thirdparty/releases/download/latest/gbenchmark-${GLUTEN_GBENCHMARK_BUILD_VERSION}.tar.gz") -set(GLUTEN_GBENCHMARK_BUILD_SHA256_CHECKSUM "1f71c72ce08d2c1310011ea6436b31e39ccab8c2db94186d26657d41747c85d6") + "https://github.com/ursa-labs/thirdparty/releases/download/latest/gbenchmark-${GLUTEN_GBENCHMARK_BUILD_VERSION}.tar.gz" +) +set(GLUTEN_GBENCHMARK_BUILD_SHA256_CHECKSUM + "1f71c72ce08d2c1310011ea6436b31e39ccab8c2db94186d26657d41747c85d6") resolve_dependency_url(GBENCHMARK) @@ -30,12 +32,11 @@ set(GBENCHMARK_CMAKE_ARGS "-fPIC -w") message(STATUS "Building google benchmark from source") FetchContent_Declare( - gbenchmark - URL ${GLUTEN_GBENCHMARK_SOURCE_URL} - URL_HASH "${GLUTEN_GBENCHMARK_BUILD_SHA256_CHECKSUM}" -) + gbenchmark + URL ${GLUTEN_GBENCHMARK_SOURCE_URL} + URL_HASH "${GLUTEN_GBENCHMARK_BUILD_SHA256_CHECKSUM}") -if (NOT gbenchmark_POPULATED) +if(NOT gbenchmark_POPULATED) # We don't want to build tests. set(BENCHMARK_ENABLE_TESTING OFF diff --git a/cpp/CMake/BuildMemkind.cmake b/cpp/CMake/BuildMemkind.cmake index 039db0cc0d81..2f2248de6fc4 100644 --- a/cpp/CMake/BuildMemkind.cmake +++ b/cpp/CMake/BuildMemkind.cmake @@ -26,48 +26,50 @@ endif() macro(build_hwloc) message(STATUS "Building hwloc from source") set(HWLOC_BUILD_VERSION "2.8.0") - set(HWLOC_BUILD_SHA256_CHECKSUM "311d44e99bbf6d269c2cbc569d073978d88352bc31d51e31457d4df94783172d") + set(HWLOC_BUILD_SHA256_CHECKSUM + "311d44e99bbf6d269c2cbc569d073978d88352bc31d51e31457d4df94783172d") set(HWLOC_SOURCE_URL - "https://github.com/open-mpi/hwloc/archive/refs/tags/hwloc-${HWLOC_BUILD_VERSION}.tar.gz") + "https://github.com/open-mpi/hwloc/archive/refs/tags/hwloc-${HWLOC_BUILD_VERSION}.tar.gz" + ) set(HWLOC_LIB_NAME "hwloc") set(HWLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/hwloc_ep-install") set(HWLOC_SOURCE_DIR "${HWLOC_PREFIX}/src/hwloc_ep") set(HWLOC_INCLUDE_DIR "${HWLOC_SOURCE_DIR}/include") set(HWLOC_LIB_DIR "${HWLOC_SOURCE_DIR}/hwloc/.libs") - set(HWLOC_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${HWLOC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(HWLOC_STATIC_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${HWLOC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) set(HWLOC_STATIC_LIB_TARGETS - "${HWLOC_SOURCE_DIR}/src/.libs/${HWLOC_STATIC_LIB_NAME}") + "${HWLOC_SOURCE_DIR}/src/.libs/${HWLOC_STATIC_LIB_NAME}") set(HWLOC_CONFIGURE_ARGS - "--prefix=${HWLOC_PREFIX}" - "--with-pic" - "--enable-static" - "--disable-shared" - "--enable-plugins") - ExternalProject_Add(hwloc_ep - PREFIX ${HWLOC_PREFIX} - URL ${HWLOC_SOURCE_URL} - URL_HASH "SHA256=${HWLOC_BUILD_SHA256_CHECKSUM}" - SOURCE_DIR ${HWLOC_SOURCE_DIR} - CONFIGURE_COMMAND ./configure ${HWLOC_CONFIGURE_ARGS} - BUILD_COMMAND ${MAKE} - BUILD_BYPRODUCTS ${HWLOC_STATIC_LIB_TARGETS} - BUILD_IN_SOURCE 1) + "--prefix=${HWLOC_PREFIX}" "--with-pic" "--enable-static" + "--disable-shared" "--enable-plugins") + ExternalProject_Add( + hwloc_ep + PREFIX ${HWLOC_PREFIX} + URL ${HWLOC_SOURCE_URL} + URL_HASH "SHA256=${HWLOC_BUILD_SHA256_CHECKSUM}" + SOURCE_DIR ${HWLOC_SOURCE_DIR} + CONFIGURE_COMMAND ./configure ${HWLOC_CONFIGURE_ARGS} + BUILD_COMMAND ${MAKE} + BUILD_BYPRODUCTS ${HWLOC_STATIC_LIB_TARGETS} + BUILD_IN_SOURCE 1) - ExternalProject_Add_Step(hwloc_ep pre-configure - COMMAND ./autogen.sh - DEPENDEES download - DEPENDERS configure - WORKING_DIRECTORY ${HWLOC_SOURCE_DIR}) + ExternalProject_Add_Step( + hwloc_ep pre-configure + COMMAND ./autogen.sh + DEPENDEES download + DEPENDERS configure + WORKING_DIRECTORY ${HWLOC_SOURCE_DIR}) # The include directory must exist before it is referenced by a target. file(MAKE_DIRECTORY "${HWLOC_INCLUDE_DIR}") add_library(hwloc::hwloc STATIC IMPORTED) - set_target_properties(hwloc::hwloc - PROPERTIES IMPORTED_LOCATION - "${HWLOC_LIB_DIR}/${HWLOC_STATIC_LIB_NAME}" - INTERFACE_INCLUDE_DIRECTORIES - "${HWLOC_INCLUDE_DIR}") + set_target_properties( + hwloc::hwloc + PROPERTIES IMPORTED_LOCATION "${HWLOC_LIB_DIR}/${HWLOC_STATIC_LIB_NAME}" + INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}") add_dependencies(hwloc::hwloc hwloc_ep) endmacro() @@ -75,36 +77,43 @@ endmacro() macro(build_memkind) message(STATUS "Building Memkind from source") set(MEMKIND_BUILD_VERSION "v1.14.0") - set(MEMKIND_BUILD_SHA256_CHECKSUM "ab366b20b5a87ea655483631fc762ba6eb59eb6c3a08652e643f1ee3f06a6a12") + set(MEMKIND_BUILD_SHA256_CHECKSUM + "ab366b20b5a87ea655483631fc762ba6eb59eb6c3a08652e643f1ee3f06a6a12") set(MEMKIND_SOURCE_URL - "https://github.com/memkind/memkind/archive/refs/tags/${MEMKIND_BUILD_VERSION}.tar.gz") + "https://github.com/memkind/memkind/archive/refs/tags/${MEMKIND_BUILD_VERSION}.tar.gz" + ) set(MEMKIND_LIB_NAME "memkind") set(MEMKIND_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/memkind_ep-install") set(MEMKIND_SOURCE_DIR "${MEMKIND_PREFIX}/src/memkind_ep") set(MEMKIND_INCLUDE_DIR "${MEMKIND_SOURCE_DIR}/include") set(MEMKIND_LIB_DIR "${MEMKIND_SOURCE_DIR}/.libs") - set(MEMKIND_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${MEMKIND_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(MEMKIND_STATIC_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${MEMKIND_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) set(MEMKIND_STATIC_LIB_TARGETS - "${MEMKIND_SOURCE_DIR}/src/.libs/${MEMKIND_STATIC_LIB_NAME}") - set(MEMKIND_CONFIGURE_ARGS - "--prefix=${MEMKIND_PREFIX}" - "--with-pic" - "--enable-static") - ExternalProject_Add(memkind_ep - PREFIX ${MEMKIND_PREFIX} - URL ${MEMKIND_SOURCE_URL} - URL_HASH "SHA256=${MEMKIND_BUILD_SHA256_CHECKSUM}" - SOURCE_DIR ${MEMKIND_SOURCE_DIR} - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env LDFLAGS=-L${HWLOC_LIB_DIR} env CFLAGS=-I${HWLOC_INCLUDE_DIR} env CXXFLAGS=-I${HWLOC_INCLUDE_DIR} ./configure ${MEMKIND_CONFIGURE_ARGS} - BUILD_COMMAND ${MAKE} - BUILD_BYPRODUCTS ${MEMKIND_STATIC_LIB_TARGETS} - BUILD_IN_SOURCE 1) + "${MEMKIND_SOURCE_DIR}/src/.libs/${MEMKIND_STATIC_LIB_NAME}") + set(MEMKIND_CONFIGURE_ARGS "--prefix=${MEMKIND_PREFIX}" "--with-pic" + "--enable-static") + ExternalProject_Add( + memkind_ep + PREFIX ${MEMKIND_PREFIX} + URL ${MEMKIND_SOURCE_URL} + URL_HASH "SHA256=${MEMKIND_BUILD_SHA256_CHECKSUM}" + SOURCE_DIR ${MEMKIND_SOURCE_DIR} + CONFIGURE_COMMAND + ${CMAKE_COMMAND} -E env LDFLAGS=-L${HWLOC_LIB_DIR} env + CFLAGS=-I${HWLOC_INCLUDE_DIR} env CXXFLAGS=-I${HWLOC_INCLUDE_DIR} + ./configure ${MEMKIND_CONFIGURE_ARGS} + BUILD_COMMAND ${MAKE} + BUILD_BYPRODUCTS ${MEMKIND_STATIC_LIB_TARGETS} + BUILD_IN_SOURCE 1) - ExternalProject_Add_Step(memkind_ep pre-configure - COMMAND ./autogen.sh - DEPENDEES download - DEPENDERS configure - WORKING_DIRECTORY ${MEMKIND_SOURCE_DIR}) + ExternalProject_Add_Step( + memkind_ep pre-configure + COMMAND ./autogen.sh + DEPENDEES download + DEPENDERS configure + WORKING_DIRECTORY ${MEMKIND_SOURCE_DIR}) add_dependencies(memkind_ep hwloc::hwloc) @@ -112,12 +121,12 @@ macro(build_memkind) file(MAKE_DIRECTORY "${MEMKIND_INCLUDE_DIR}") add_library(memkind::memkind STATIC IMPORTED) - set_target_properties(memkind::memkind - PROPERTIES IMPORTED_LOCATION - "${MEMKIND_LIB_DIR}/${MEMKIND_STATIC_LIB_NAME}" - INTERFACE_INCLUDE_DIRECTORIES - "${MEMKIND_INCLUDE_DIR}") - target_link_libraries(memkind::memkind INTERFACE hwloc::hwloc dl numa pthread daxctl) + set_target_properties( + memkind::memkind + PROPERTIES IMPORTED_LOCATION "${MEMKIND_LIB_DIR}/${MEMKIND_STATIC_LIB_NAME}" + INTERFACE_INCLUDE_DIRECTORIES "${MEMKIND_INCLUDE_DIR}") + target_link_libraries(memkind::memkind INTERFACE hwloc::hwloc dl numa pthread + daxctl) add_dependencies(memkind::memkind memkind_ep) endmacro() diff --git a/cpp/CMake/BuildQATZstd.cmake b/cpp/CMake/BuildQATZstd.cmake index f6c152db1460..f79e9ea58fcc 100644 --- a/cpp/CMake/BuildQATZstd.cmake +++ b/cpp/CMake/BuildQATZstd.cmake @@ -28,61 +28,72 @@ macro(build_qatzstd) include(FindZstd) message(STATUS "Building QAT-ZSTD from source") - set(QATZSTD_SOURCE_URL - "https://github.com/marin-ma/QAT-ZSTD-Plugin.git") + set(QATZSTD_SOURCE_URL "https://github.com/marin-ma/QAT-ZSTD-Plugin.git") set(QATZSTD_SOURCE_BRANCH "fix-duplicate-symbol") set(QATZSTD_LIB_NAME "qatseqprod") - set(QATZSTD_PREFIX - "${CMAKE_CURRENT_BINARY_DIR}/qatzstd_ep-install") + set(QATZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/qatzstd_ep-install") set(QATZSTD_SOURCE_DIR "${QATZSTD_PREFIX}/src/qatzstd_ep") set(QATZSTD_INCLUDE_DIR "${QATZSTD_SOURCE_DIR}/src") - set(QATZSTD_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${QATZSTD_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(QATZSTD_STATIC_LIB_TARGETS "${QATZSTD_SOURCE_DIR}/src/${QATZSTD_STATIC_LIB_NAME}") + set(QATZSTD_STATIC_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${QATZSTD_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(QATZSTD_STATIC_LIB_TARGETS + "${QATZSTD_SOURCE_DIR}/src/${QATZSTD_STATIC_LIB_NAME}") set(QATZSTD_MAKE_ARGS "ENABLE_USDM_DRV=1" "ZSTDLIB=${ZSTD_INCLUDE_DIR}") - ExternalProject_Add(qatzstd_ep - PREFIX ${QATZSTD_PREFIX} - GIT_REPOSITORY ${QATZSTD_SOURCE_URL} - GIT_TAG ${QATZSTD_SOURCE_BRANCH} - SOURCE_DIR ${QATZSTD_SOURCE_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND ${MAKE} ${QATZSTD_MAKE_ARGS} - INSTALL_COMMAND "" - BUILD_BYPRODUCTS ${QATZSTD_STATIC_LIB_TARGETS} - BUILD_IN_SOURCE 1) + ExternalProject_Add( + qatzstd_ep + PREFIX ${QATZSTD_PREFIX} + GIT_REPOSITORY ${QATZSTD_SOURCE_URL} + GIT_TAG ${QATZSTD_SOURCE_BRANCH} + SOURCE_DIR ${QATZSTD_SOURCE_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND ${MAKE} ${QATZSTD_MAKE_ARGS} + INSTALL_COMMAND "" + BUILD_BYPRODUCTS ${QATZSTD_STATIC_LIB_TARGETS} + BUILD_IN_SOURCE 1) add_library(qatzstd::qatzstd STATIC IMPORTED) # The include directory must exist before it is referenced by a target. file(MAKE_DIRECTORY "${QATZSTD_INCLUDE_DIR}") - set(QATZSTD_INCLUDE_DIRS - "${QATZSTD_INCLUDE_DIR}" - "${ZSTD_INCLUDE_DIR}") + set(QATZSTD_INCLUDE_DIRS "${QATZSTD_INCLUDE_DIR}" "${ZSTD_INCLUDE_DIR}") set(QATZSTD_LINK_LIBRARIES - "${ZSTD_LIBRARY}" - "${QAT_LIBRARY}" - "${USDM_DRV_LIBRARY}" - "${ADF_LIBRARY}" + "${ZSTD_LIBRARY}" "${QAT_LIBRARY}" "${USDM_DRV_LIBRARY}" "${ADF_LIBRARY}" "${OSAL_LIBRARY}") - set_target_properties(qatzstd::qatzstd - PROPERTIES IMPORTED_LOCATION - "${QATZSTD_STATIC_LIB_TARGETS}" - INTERFACE_INCLUDE_DIRECTORIES - "${QATZSTD_INCLUDE_DIRS}" - INTERFACE_LINK_LIBRARIES - "${QATZSTD_LINK_LIBRARIES}") + set_target_properties( + qatzstd::qatzstd + PROPERTIES IMPORTED_LOCATION "${QATZSTD_STATIC_LIB_TARGETS}" + INTERFACE_INCLUDE_DIRECTORIES "${QATZSTD_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${QATZSTD_LINK_LIBRARIES}") add_dependencies(qatzstd::qatzstd qatzstd_ep) endmacro() -find_library(QAT_LIBRARY REQUIRED NAMES qat PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) -find_library(USDM_DRV_LIBRARY REQUIRED NAMES usdm_drv PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) -find_library(ADF_LIBRARY REQUIRED NAMES adf PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) -find_library(OSAL_LIBRARY REQUIRED NAMES osal PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) +find_library( + QAT_LIBRARY REQUIRED + NAMES qat + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) +find_library( + USDM_DRV_LIBRARY REQUIRED + NAMES usdm_drv + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) +find_library( + ADF_LIBRARY REQUIRED + NAMES adf + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) +find_library( + OSAL_LIBRARY REQUIRED + NAMES osal + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) message(STATUS "Found qat: ${QAT_LIBRARY}") message(STATUS "Found usdm_drv: ${USDM_DRV_LIBRARY}") @@ -90,4 +101,3 @@ message(STATUS "Found adf: ${ADF_LIBRARY}") message(STATUS "Found osal: ${OSAL_LIBRARY}") build_qatzstd() - diff --git a/cpp/CMake/BuildQATzip.cmake b/cpp/CMake/BuildQATzip.cmake index 376f1645509a..c68ef25ada2e 100644 --- a/cpp/CMake/BuildQATzip.cmake +++ b/cpp/CMake/BuildQATzip.cmake @@ -26,38 +26,42 @@ endif() macro(build_qatzip) message(STATUS "Building QATzip from source") set(QATZIP_BUILD_VERSION "v1.1.1") - set(QATZIP_BUILD_SHA256_CHECKSUM "679f5522deb35e7ffa36f227ae49d07ef2d69a83e56bfda849303829b274e79b") + set(QATZIP_BUILD_SHA256_CHECKSUM + "679f5522deb35e7ffa36f227ae49d07ef2d69a83e56bfda849303829b274e79b") set(QATZIP_SOURCE_URL - "https://github.com/intel/QATzip/archive/refs/tags/${QATZIP_BUILD_VERSION}.tar.gz") + "https://github.com/intel/QATzip/archive/refs/tags/${QATZIP_BUILD_VERSION}.tar.gz" + ) set(QATZIP_LIB_NAME "qatzip") - set(QATZIP_PREFIX - "${CMAKE_CURRENT_BINARY_DIR}/qatzip_ep-install") + set(QATZIP_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/qatzip_ep-install") set(QATZIP_SOURCE_DIR "${QATZIP_PREFIX}/src/qatzip_ep") set(QATZIP_INCLUDE_DIR "${QATZIP_SOURCE_DIR}/include") - set(QATZIP_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${QATZIP_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(QATZIP_STATIC_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${QATZIP_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) set(QATZIP_STATIC_LIB_TARGETS "${QATZIP_SOURCE_DIR}/src/.libs/${QATZIP_STATIC_LIB_NAME}") - set(QATZIP_CONFIGURE_ARGS - "--prefix=${QATZIP_PREFIX}" - "--with-pic" - "--with-ICP_ROOT=$ENV{ICP_ROOT}") + set(QATZIP_CONFIGURE_ARGS "--prefix=${QATZIP_PREFIX}" "--with-pic" + "--with-ICP_ROOT=$ENV{ICP_ROOT}") - ExternalProject_Add(qatzip_ep - PREFIX ${QATZIP_PREFIX} - URL ${QATZIP_SOURCE_URL} - URL_HASH "SHA256=${QATZIP_BUILD_SHA256_CHECKSUM}" - SOURCE_DIR ${QATZIP_SOURCE_DIR} - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env QZ_ROOT=${QATZIP_SOURCE_DIR} ./configure ${QATZIP_CONFIGURE_ARGS} - BUILD_COMMAND ${MAKE} all - BUILD_BYPRODUCTS ${QATZIP_STATIC_LIB_TARGETS} - BUILD_IN_SOURCE 1) + ExternalProject_Add( + qatzip_ep + PREFIX ${QATZIP_PREFIX} + URL ${QATZIP_SOURCE_URL} + URL_HASH "SHA256=${QATZIP_BUILD_SHA256_CHECKSUM}" + SOURCE_DIR ${QATZIP_SOURCE_DIR} + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env QZ_ROOT=${QATZIP_SOURCE_DIR} + ./configure ${QATZIP_CONFIGURE_ARGS} + BUILD_COMMAND ${MAKE} all + BUILD_BYPRODUCTS ${QATZIP_STATIC_LIB_TARGETS} + BUILD_IN_SOURCE 1) - ExternalProject_Add_Step(qatzip_ep pre-configure - COMMAND ./autogen.sh - DEPENDEES download - DEPENDERS configure - WORKING_DIRECTORY ${QATZIP_SOURCE_DIR}) + ExternalProject_Add_Step( + qatzip_ep pre-configure + COMMAND ./autogen.sh + DEPENDEES download + DEPENDERS configure + WORKING_DIRECTORY ${QATZIP_SOURCE_DIR}) # The include directory must exist before it is referenced by a target. file(MAKE_DIRECTORY "${QATZIP_INCLUDE_DIR}") @@ -73,13 +77,11 @@ macro(build_qatzip) Threads::Threads) add_library(qatzip::qatzip STATIC IMPORTED) - set_target_properties(qatzip::qatzip - PROPERTIES IMPORTED_LOCATION - "${QATZIP_STATIC_LIB_TARGETS}" - INTERFACE_INCLUDE_DIRECTORIES - "${QATZIP_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES - "${QATZIP_LINK_LIBRARIES}") + set_target_properties( + qatzip::qatzip + PROPERTIES IMPORTED_LOCATION "${QATZIP_STATIC_LIB_TARGETS}" + INTERFACE_INCLUDE_DIRECTORIES "${QATZIP_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${QATZIP_LINK_LIBRARIES}") add_dependencies(qatzip::qatzip qatzip_ep) endmacro() @@ -90,10 +92,26 @@ find_package(Threads REQUIRED) find_library(ZLIB_LIBRARY REQUIRED NAMES z) find_library(LZ4_LIBRARY REQUIRED NAMES lz4) find_library(UDEV_LIBRARY REQUIRED NAMES udev) -find_library(USDM_DRV_LIBRARY REQUIRED NAMES usdm_drv PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) -find_library(QAT_LIBRARY REQUIRED NAMES qat PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) -find_library(OSAL_LIBRARY REQUIRED NAMES osal PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) -find_library(ADF_LIBRARY REQUIRED NAMES adf PATHS "$ENV{ICP_ROOT}/build" NO_DEFAULT_PATH) +find_library( + USDM_DRV_LIBRARY REQUIRED + NAMES usdm_drv + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) +find_library( + QAT_LIBRARY REQUIRED + NAMES qat + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) +find_library( + OSAL_LIBRARY REQUIRED + NAMES osal + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) +find_library( + ADF_LIBRARY REQUIRED + NAMES adf + PATHS "$ENV{ICP_ROOT}/build" + NO_DEFAULT_PATH) message(STATUS "Found zlib: ${ZLIB_LIBRARY}") message(STATUS "Found lz4: ${LZ4_LIBRARY}") @@ -102,4 +120,3 @@ message(STATUS "Found usdm_drv: ${USDM_DRV_LIBRARY}") message(STATUS "Found qat: ${QAT_LIBRARY}") build_qatzip() - diff --git a/cpp/CMake/BuildQpl.cmake b/cpp/CMake/BuildQpl.cmake index dbfd16645342..7715bb8e767f 100644 --- a/cpp/CMake/BuildQpl.cmake +++ b/cpp/CMake/BuildQpl.cmake @@ -18,46 +18,43 @@ include(ExternalProject) macro(build_qpl) - message(STATUS "Building QPL from source") - set(QPL_BUILD_VERSION "v1.1.0") - set(QPL_BUILD_SHA256_CHECKSUM "00306000035621dfbc21007481395c46ba9723fc8add8ca5142847b94dc564c5") - set(QPL_SOURCE_URL - "https://github.com/intel/qpl/archive/refs/tags/v1.1.0.tar.gz") - set(QPL_LIB_NAME "qpl") - - set(QPL_PREFIX - "${CMAKE_CURRENT_BINARY_DIR}/qpl_ep-install") - set(QPL_SOURCE_DIR "${QPL_PREFIX}/src/qpl_ep") - set(QPL_INCLUDE_DIR "${QPL_PREFIX}/include") - set(QPL_LIB_DIR "${QPL_PREFIX}/lib") - set(QPL_STATIC_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${QPL_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}${QPL_STATIC_LIB_MAJOR_VERSION}") - set(QPL_STATIC_LIB_TARGETS - "${QPL_LIB_DIR}/${QPL_STATIC_LIB_NAME}" - ) - ExternalProject_Add(qpl_ep - PREFIX ${QPL_PREFIX} - URL ${QPL_SOURCE_URL} - URL_HASH "SHA256=${QPL_BUILD_SHA256_CHECKSUM}" - SOURCE_DIR ${QPL_SOURCE_DIR} - CMAKE_ARGS - -DCMAKE_INSTALL_PREFIX=${QPL_PREFIX} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DQPL_BUILD_TESTS=OFF - -DLOG_HW_INIT=ON - BUILD_BYPRODUCTS ${QPL_STATIC_LIB_TARGETS}) - - # The include directory must exist before it is referenced by a target. - file(MAKE_DIRECTORY "${QPL_INCLUDE_DIR}") - - add_library(qpl::qpl STATIC IMPORTED) - set_target_properties(qpl::qpl - PROPERTIES IMPORTED_LOCATION - "${QPL_LIB_DIR}/${QPL_STATIC_LIB_NAME}" - INTERFACE_INCLUDE_DIRECTORIES - "${QPL_INCLUDE_DIR}") - - add_dependencies(qpl::qpl qpl_ep) + message(STATUS "Building QPL from source") + set(QPL_BUILD_VERSION "v1.1.0") + set(QPL_BUILD_SHA256_CHECKSUM + "00306000035621dfbc21007481395c46ba9723fc8add8ca5142847b94dc564c5") + set(QPL_SOURCE_URL + "https://github.com/intel/qpl/archive/refs/tags/v1.1.0.tar.gz") + set(QPL_LIB_NAME "qpl") + + set(QPL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/qpl_ep-install") + set(QPL_SOURCE_DIR "${QPL_PREFIX}/src/qpl_ep") + set(QPL_INCLUDE_DIR "${QPL_PREFIX}/include") + set(QPL_LIB_DIR "${QPL_PREFIX}/lib") + set(QPL_STATIC_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${QPL_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}${QPL_STATIC_LIB_MAJOR_VERSION}" + ) + set(QPL_STATIC_LIB_TARGETS "${QPL_LIB_DIR}/${QPL_STATIC_LIB_NAME}") + ExternalProject_Add( + qpl_ep + PREFIX ${QPL_PREFIX} + URL ${QPL_SOURCE_URL} + URL_HASH "SHA256=${QPL_BUILD_SHA256_CHECKSUM}" + SOURCE_DIR ${QPL_SOURCE_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${QPL_PREFIX} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DQPL_BUILD_TESTS=OFF + -DLOG_HW_INIT=ON + BUILD_BYPRODUCTS ${QPL_STATIC_LIB_TARGETS}) + + # The include directory must exist before it is referenced by a target. + file(MAKE_DIRECTORY "${QPL_INCLUDE_DIR}") + + add_library(qpl::qpl STATIC IMPORTED) + set_target_properties( + qpl::qpl + PROPERTIES IMPORTED_LOCATION "${QPL_LIB_DIR}/${QPL_STATIC_LIB_NAME}" + INTERFACE_INCLUDE_DIRECTORIES "${QPL_INCLUDE_DIR}") + + add_dependencies(qpl::qpl qpl_ep) endmacro() build_qpl() - diff --git a/cpp/CMake/ConfigArrow.cmake b/cpp/CMake/ConfigArrow.cmake index 1ae4ece1b8ef..8f036be53411 100644 --- a/cpp/CMake/ConfigArrow.cmake +++ b/cpp/CMake/ConfigArrow.cmake @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") +if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") set(ARROW_SHARED_LIBRARY_SUFFIX ".1500.dylib") set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".1500.1.0.dylib") else() @@ -30,22 +30,28 @@ set(ARROW_SUBSTRAIT_LIB_NAME "arrow_substrait") function(FIND_ARROW_LIB LIB_NAME) if(NOT TARGET Arrow::${LIB_NAME}) - set(ARROW_LIB_FULL_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}) + set(ARROW_LIB_FULL_NAME + ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}) add_library(Arrow::${LIB_NAME} SHARED IMPORTED) - find_library(ARROW_LIB_${LIB_NAME} - NAMES ${ARROW_LIB_FULL_NAME} - PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} - NO_DEFAULT_PATH) + find_library( + ARROW_LIB_${LIB_NAME} + NAMES ${ARROW_LIB_FULL_NAME} + PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} + NO_DEFAULT_PATH) if(NOT ARROW_LIB_${LIB_NAME}) message(FATAL_ERROR "Arrow library Not Found: ${ARROW_LIB_FULL_NAME}") else() message(STATUS "Found Arrow library: ${ARROW_LIB_${LIB_NAME}}") - set_target_properties(Arrow::${LIB_NAME} + set_target_properties( + Arrow::${LIB_NAME} PROPERTIES IMPORTED_LOCATION "${ARROW_LIB_${LIB_NAME}}" - INTERFACE_INCLUDE_DIRECTORIES - "${ARROW_HOME}/install/include") + INTERFACE_INCLUDE_DIRECTORIES + "${ARROW_HOME}/install/include") endif() - file(COPY ${ARROW_LIB_${LIB_NAME}} DESTINATION ${root_directory}/releases/ FOLLOW_SYMLINK_CHAIN) + file( + COPY ${ARROW_LIB_${LIB_NAME}} + DESTINATION ${root_directory}/releases/ + FOLLOW_SYMLINK_CHAIN) endif() endfunction() diff --git a/cpp/CMake/FindThrift.cmake b/cpp/CMake/FindThrift.cmake index 07028971d9fc..273500a6ae36 100644 --- a/cpp/CMake/FindThrift.cmake +++ b/cpp/CMake/FindThrift.cmake @@ -12,27 +12,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -# - Find Thrift (a cross platform RPC lib/tool) +# * Find Thrift (a cross platform RPC lib/tool) # # Variables used by this module, they can change the default behaviour and need # to be set before calling find_package: # -# Thrift_ROOT - When set, this path is inspected instead of standard library -# locations as the root of the Thrift installation. -# The environment variable THRIFT_HOME overrides this variable. +# Thrift_ROOT - When set, this path is inspected instead of standard library +# locations as the root of the Thrift installation. The environment variable +# THRIFT_HOME overrides this variable. # -# This module defines -# Thrift_FOUND, whether Thrift is found or not -# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not +# This module defines Thrift_FOUND, whether Thrift is found or not +# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not # -# thrift::thrift, a library target to use Thrift -# thrift::compiler, a executable target to use Thrift compiler +# thrift::thrift, a library target to use Thrift thrift::compiler, a executable +# target to use Thrift compiler function(EXTRACT_THRIFT_VERSION) if(THRIFT_INCLUDE_DIR) file(READ "${THRIFT_INCLUDE_DIR}/thrift/config.h" THRIFT_CONFIG_H_CONTENT) - string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION - "${THRIFT_CONFIG_H_CONTENT}") + string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" + THRIFT_VERSION_DEFINITION "${THRIFT_CONFIG_H_CONTENT}") string(REGEX MATCH "[0-9.]+" Thrift_VERSION "${THRIFT_VERSION_DEFINITION}") set(Thrift_VERSION "${Thrift_VERSION}" @@ -66,14 +65,16 @@ set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}") if(ARROW_THRIFT_USE_SHARED) set(THRIFT_LIB_NAMES thrift) if(CMAKE_IMPORT_LIBRARY_SUFFIX) - list(APPEND - THRIFT_LIB_NAMES - "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" + list( + APPEND + THRIFT_LIB_NAMES + "${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}" ) endif() - list(APPEND - THRIFT_LIB_NAMES - "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}" + list( + APPEND + THRIFT_LIB_NAMES + "${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}" ) else() set(THRIFT_LIB_NAMES @@ -82,20 +83,24 @@ else() endif() if(Thrift_ROOT) - find_library(THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") - find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "include") - find_program(THRIFT_COMPILER thrift - PATHS ${Thrift_ROOT} - PATH_SUFFIXES "bin") + find_library( + THRIFT_LIB + NAMES ${THRIFT_LIB_NAMES} + PATHS ${Thrift_ROOT} + PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") + find_path( + THRIFT_INCLUDE_DIR thrift/Thrift.h + PATHS ${Thrift_ROOT} + PATH_SUFFIXES "include") + find_program( + THRIFT_COMPILER thrift + PATHS ${Thrift_ROOT} + PATH_SUFFIXES "bin") extract_thrift_version() else() - # THRIFT-4760: The pkgconfig files are currently only installed when using autotools. - # Starting with 0.13, they are also installed for the CMake-based installations of Thrift. + # THRIFT-4760: The pkgconfig files are currently only installed when using + # autotools. Starting with 0.13, they are also installed for the CMake-based + # installations of Thrift. find_package(PkgConfig QUIET) pkg_check_modules(THRIFT_PC thrift) if(THRIFT_PC_FOUND) @@ -103,19 +108,22 @@ else() list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}") - find_library(THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATHS ${THRIFT_PC_LIBRARY_DIRS} - NO_DEFAULT_PATH) - find_program(THRIFT_COMPILER thrift - HINTS ${THRIFT_PC_PREFIX} - NO_DEFAULT_PATH - PATH_SUFFIXES "bin") + find_library( + THRIFT_LIB + NAMES ${THRIFT_LIB_NAMES} + PATHS ${THRIFT_PC_LIBRARY_DIRS} + NO_DEFAULT_PATH) + find_program( + THRIFT_COMPILER thrift + HINTS ${THRIFT_PC_PREFIX} + NO_DEFAULT_PATH + PATH_SUFFIXES "bin") set(Thrift_VERSION ${THRIFT_PC_VERSION}) else() - find_library(THRIFT_LIB - NAMES ${THRIFT_LIB_NAMES} - PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") + find_library( + THRIFT_LIB + NAMES ${THRIFT_LIB_NAMES} + PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include") find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin") extract_thrift_version() @@ -140,14 +148,15 @@ if(Thrift_FOUND) else() add_library(thrift::thrift STATIC IMPORTED) endif() - set_target_properties(thrift::thrift - PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}" - INTERFACE_INCLUDE_DIRECTORIES "${THRIFT_INCLUDE_DIR}") + set_target_properties( + thrift::thrift + PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}" INTERFACE_INCLUDE_DIRECTORIES + "${THRIFT_INCLUDE_DIR}") if(WIN32 AND NOT MSVC_TOOLCHAIN) - # We don't need this for Visual C++ because Thrift uses - # "#pragma comment(lib, "Ws2_32.lib")" in - # thrift/windows/config.h for Visual C++. - set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES "ws2_32") + # We don't need this for Visual C++ because Thrift uses "#pragma + # comment(lib, "Ws2_32.lib")" in thrift/windows/config.h for Visual C++. + set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES + "ws2_32") endif() if(Thrift_COMPILER_FOUND) diff --git a/cpp/CMake/FindZstd.cmake b/cpp/CMake/FindZstd.cmake index a7efd5adfd3f..62e8b874d735 100644 --- a/cpp/CMake/FindZstd.cmake +++ b/cpp/CMake/FindZstd.cmake @@ -15,43 +15,44 @@ # specific language governing permissions and limitations # under the License. -# ZSTD_HOME environmental variable is used to check for Zstd headers and static library +# ZSTD_HOME environmental variable is used to check for Zstd headers and static +# library -# ZSTD_INCLUDE_DIR: directory containing headers -# ZSTD_LIBRARY: path to libzstd.so -# ZSTD_FOUND: whether zstd has been found +# ZSTD_INCLUDE_DIR: directory containing headers ZSTD_LIBRARY: path to +# libzstd.so ZSTD_FOUND: whether zstd has been found -if (NOT "$ENV{ZSTD_HOME}" STREQUAL "") +if(NOT "$ENV{ZSTD_HOME}" STREQUAL "") file(TO_CMAKE_PATH "$ENV{ZSTD_HOME}" _zstd_path) message(STATUS "ZSTD_HOME: ${_zstd_path}") else() set(_zstd_path "/usr/local") endif() -find_path(ZSTD_INCLUDE_DIR zstd.h HINTS - ${_zstd_path} - PATH_SUFFIXES "include") +find_path( + ZSTD_INCLUDE_DIR zstd.h + HINTS ${_zstd_path} + PATH_SUFFIXES "include") -find_library (ZSTD_LIBRARY NAMES zstd HINTS - ${_zstd_path} - PATH_SUFFIXES "lib") +find_library( + ZSTD_LIBRARY + NAMES zstd + HINTS ${_zstd_path} + PATH_SUFFIXES "lib") -if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) +if(ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) set(ZSTD_FOUND TRUE) set(ZSTD_HEADER_NAME zstd.h) set(ZSTD_HEADER ${ZSTD_INCLUDE_DIR}/${ZSTD_HEADER_NAME}) -else () +else() set(ZSTD_FOUND FALSE) -endif () +endif() -if (ZSTD_FOUND) +if(ZSTD_FOUND) message(STATUS "Found the zstd header: ${ZSTD_HEADER}") message(STATUS "Found the zstd static library: ${ZSTD_LIBRARY}") -else () - message(FATAL_ERROR ZSTD_ERR_MSG "Could not find zstd. Looked in ${_zstd_path}.") -endif () - -mark_as_advanced( - ZSTD_INCLUDE_DIR - ZSTD_LIBRARY) +else() + message(FATAL_ERROR ZSTD_ERR_MSG + "Could not find zstd. Looked in ${_zstd_path}.") +endif() +mark_as_advanced(ZSTD_INCLUDE_DIR ZSTD_LIBRARY) diff --git a/cpp/CMake/Findglog.cmake b/cpp/CMake/Findglog.cmake index b165fd80f3d7..6d9dbdacf1b1 100644 --- a/cpp/CMake/Findglog.cmake +++ b/cpp/CMake/Findglog.cmake @@ -22,23 +22,17 @@ if(NOT BUILD_GLOG) include(FindPackageHandleStandardArgs) include(SelectLibraryConfigurations) - find_library(GLOG_LIBRARY_RELEASE glog - PATHS ${GLOG_LIBRARYDIR}) - find_library(GLOG_LIBRARY_DEBUG glogd - PATHS ${GLOG_LIBRARYDIR}) + find_library(GLOG_LIBRARY_RELEASE glog PATHS ${GLOG_LIBRARYDIR}) + find_library(GLOG_LIBRARY_DEBUG glogd PATHS ${GLOG_LIBRARYDIR}) - find_path(GLOG_INCLUDE_DIR glog/logging.h - PATHS ${GLOG_INCLUDEDIR}) + find_path(GLOG_INCLUDE_DIR glog/logging.h PATHS ${GLOG_INCLUDEDIR}) select_library_configurations(GLOG) - find_package_handle_standard_args(glog DEFAULT_MSG - GLOG_LIBRARY - GLOG_INCLUDE_DIR) + find_package_handle_standard_args(glog DEFAULT_MSG GLOG_LIBRARY + GLOG_INCLUDE_DIR) - mark_as_advanced( - GLOG_LIBRARY - GLOG_INCLUDE_DIR) + mark_as_advanced(GLOG_LIBRARY GLOG_INCLUDE_DIR) endif() if(NOT glog_FOUND) @@ -56,26 +50,40 @@ endif() # glog::glog may already exist. Use google::glog to avoid conflicts. add_library(google::glog ${libglog_type} IMPORTED) -set_target_properties(google::glog PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${GLOG_INCLUDE_DIR}") -set_target_properties(google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "C" IMPORTED_LOCATION "${GLOG_LIBRARY}") +set_target_properties(google::glog PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${GLOG_INCLUDE_DIR}") +set_target_properties( + google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${GLOG_LIBRARY}") set(GLUTEN_GFLAGS_VERSION 2.2.2) -find_package(gflags ${GLUTEN_GFLAGS_VERSION} CONFIG COMPONENTS ${libgflags_component}) +find_package(gflags ${GLUTEN_GFLAGS_VERSION} CONFIG + COMPONENTS ${libgflags_component}) if(NOT gflags_FOUND AND glog_FOUND) - message(FATAL_ERROR "Glog found but Gflags not found. Set BUILD_GLOG=ON and reload cmake.") + message( + FATAL_ERROR + "Glog found but Gflags not found. Set BUILD_GLOG=ON and reload cmake.") endif() if(gflags_FOUND) - if(NOT TARGET gflags::gflags_${libgflags_component} AND NOT TARGET gflags_${libgflags_component}) - message(FATAL_ERROR "Found Gflags but missing component gflags_${libgflags_component}") + if(NOT TARGET gflags::gflags_${libgflags_component} + AND NOT TARGET gflags_${libgflags_component}) + message( + FATAL_ERROR + "Found Gflags but missing component gflags_${libgflags_component}") endif() if(TARGET gflags::gflags_${libgflags_component}) - set_target_properties(google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES gflags::gflags_${libgflags_component}) + set_target_properties( + google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES + gflags::gflags_${libgflags_component}) else() - set_target_properties(google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES gflags_${libgflags_component}) + set_target_properties( + google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES + gflags_${libgflags_component}) endif() else() include(BuildGflags) - set_target_properties(google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES gflags_static) + set_target_properties( + google::glog PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES gflags_static) endif() diff --git a/cpp/CMake/Findjemalloc_pic.cmake b/cpp/CMake/Findjemalloc_pic.cmake index 9511dcd33663..fae9f0d7ad80 100644 --- a/cpp/CMake/Findjemalloc_pic.cmake +++ b/cpp/CMake/Findjemalloc_pic.cmake @@ -20,7 +20,7 @@ macro(find_jemalloc) # Find the existing Protobuf set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") find_package(jemalloc_pic) - if ("${Jemalloc_LIBRARY}" STREQUAL "Jemalloc_LIBRARY-NOTFOUND") + if("${Jemalloc_LIBRARY}" STREQUAL "Jemalloc_LIBRARY-NOTFOUND") message(FATAL_ERROR "Jemalloc Library Not Found") endif() set(PROTOC_BIN ${Jemalloc_PROTOC_EXECUTABLE}) @@ -35,22 +35,18 @@ macro(build_jemalloc) else() set(JEMALLOC_BUILD_VERSION "5.2.1") set(JEMALLOC_SOURCE_URL - "https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_BUILD_VERSION}/jemalloc-${JEMALLOC_BUILD_VERSION}.tar.bz2" - "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${JEMALLOC_BUILD_VERSION}.tar.bz2" - ) + "https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_BUILD_VERSION}/jemalloc-${JEMALLOC_BUILD_VERSION}.tar.bz2" + "https://github.com/ursa-labs/thirdparty/releases/download/latest/jemalloc-${JEMALLOC_BUILD_VERSION}.tar.bz2" + ) endif() set(JEMALLOC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/jemalloc_ep-install") set(JEMALLOC_LIB_DIR "${JEMALLOC_PREFIX}/lib") set(JEMALLOC_INCLUDE_DIR "${JEMALLOC_PREFIX}/include") - set( - JEMALLOC_STATIC_LIB - "${JEMALLOC_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}jemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set( - JEMALLOC_INCLUDE - "${JEMALLOC_PREFIX}/include" - ) + set(JEMALLOC_STATIC_LIB + "${JEMALLOC_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}jemalloc_pic${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(JEMALLOC_INCLUDE "${JEMALLOC_PREFIX}/include") set(JEMALLOC_CONFIGURE_ARGS "AR=${CMAKE_AR}" "CC=${CMAKE_C_COMPILER}" @@ -66,14 +62,15 @@ macro(build_jemalloc) "CFLAGS=-fPIC" "CXXFLAGS=-fPIC") set(JEMALLOC_BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS}) - ExternalProject_Add(jemalloc_ep - URL ${JEMALLOC_SOURCE_URL} - PATCH_COMMAND touch doc/jemalloc.3 doc/jemalloc.html - CONFIGURE_COMMAND "./configure" ${JEMALLOC_CONFIGURE_ARGS} - BUILD_COMMAND ${JEMALLOC_BUILD_COMMAND} - BUILD_IN_SOURCE 1 - BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" - INSTALL_COMMAND make install) + ExternalProject_Add( + jemalloc_ep + URL ${JEMALLOC_SOURCE_URL} + PATCH_COMMAND touch doc/jemalloc.3 doc/jemalloc.html + CONFIGURE_COMMAND "./configure" ${JEMALLOC_CONFIGURE_ARGS} + BUILD_COMMAND ${JEMALLOC_BUILD_COMMAND} + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" + INSTALL_COMMAND make install) file(MAKE_DIRECTORY "${JEMALLOC_INCLUDE_DIR}") add_library(jemalloc::libjemalloc STATIC IMPORTED) @@ -81,7 +78,6 @@ macro(build_jemalloc) jemalloc::libjemalloc PROPERTIES INTERFACE_LINK_LIBRARIES Threads::Threads IMPORTED_LOCATION "${JEMALLOC_STATIC_LIB}" - INTERFACE_INCLUDE_DIRECTORIES - "${JEMALLOC_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${JEMALLOC_INCLUDE_DIR}") add_dependencies(jemalloc::libjemalloc protobuf_ep) endmacro() diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d6e3eeb133f7..3ee336dd6a14 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -19,13 +19,13 @@ message(STATUS "Building using CMake version: ${CMAKE_VERSION}") set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# The set(CACHE) command does not remove any normal variable of the same name from the current scope -# https://cmake.org/cmake/help/latest/policy/CMP0126.html +# The set(CACHE) command does not remove any normal variable of the same name +# from the current scope https://cmake.org/cmake/help/latest/policy/CMP0126.html if(POLICY CMP0126) cmake_policy(SET CMP0126 NEW) endif() -if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") +if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") cmake_policy(SET CMP0135 NEW) endif() @@ -36,7 +36,9 @@ if(NOT DEFINED CMAKE_BUILD_TYPE) endif() set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH}) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE INTERNAL "") +set(CMAKE_EXPORT_COMPILE_COMMANDS + ON + CACHE INTERNAL "") project(gluten) @@ -60,12 +62,12 @@ option(ENABLE_ABFS "Enable ABFS" OFF) set(root_directory ${PROJECT_BINARY_DIR}) get_filename_component(GLUTEN_HOME ${CMAKE_SOURCE_DIR} DIRECTORY) -if (NOT DEFINED VELOX_HOME) +if(NOT DEFINED VELOX_HOME) set(VELOX_HOME ${GLUTEN_HOME}/ep/build-velox/build/velox_ep) message(STATUS "Set VELOX_HOME to ${VELOX_HOME}") endif() -if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") +if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(ARROW_HOME ${VELOX_HOME}/_build/debug/third_party/arrow_ep) else() set(ARROW_HOME ${VELOX_HOME}/_build/release/third_party/arrow_ep) @@ -77,10 +79,10 @@ include(ResolveDependency) # Compiler flags # -if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") +if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb -O0") message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}") -else () +else() add_definitions(-DNDEBUG) message(STATUS "Add definition NDEBUG") endif() @@ -97,22 +99,20 @@ set(KNOWN_WARNINGS -Wno-ignored-qualifiers") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - set(KNOWN_WARNINGS - "-Wno-error=unused-but-set-variable \ + set(KNOWN_WARNINGS "-Wno-error=unused-but-set-variable \ ${KNOWN_WARNINGS}") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11) - set(KNOWN_WARNINGS - "-Wno-error=maybe-uninitialized \ + set(KNOWN_WARNINGS "-Wno-error=maybe-uninitialized \ ${KNOWN_WARNINGS}") endif() -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") # Experimental set(KNOWN_WARNINGS "-Wno-implicit-int-float-conversion \ -Wno-nullability-completeness \ -Wno-mismatched-tags \ ${KNOWN_WARNINGS}") -elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") # Experimental set(KNOWN_WARNINGS "-Wno-implicit-int-float-conversion \ @@ -126,15 +126,16 @@ else() endif() # see https://issues.apache.org/jira/browse/ARROW-4665 -if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") set(KNOWN_WARNINGS "-Wno-macro-redefined \ -Wno-nullability-completeness \ -Wno-pessimizing-move \ -Wno-mismatched-tags \ ${KNOWN_WARNINGS}") - # Specific definition for an issue with boost/stacktrace when building on macOS. - # See https://github.com/boostorg/stacktrace/issues/88 and comments therein. + # Specific definition for an issue with boost/stacktrace when building on + # macOS. See https://github.com/boostorg/stacktrace/issues/88 and comments + # therein. add_compile_definitions(_GNU_SOURCE) endif() @@ -166,17 +167,11 @@ endif() function(ADD_TEST_CASE TEST_NAME) set(options) set(one_value_args) - set(multi_value_args - SOURCES - EXTRA_LINK_LIBS - EXTRA_INCLUDES - EXTRA_DEPENDENCIES) - - cmake_parse_arguments(ARG - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN}) + set(multi_value_args SOURCES EXTRA_LINK_LIBS EXTRA_INCLUDES + EXTRA_DEPENDENCIES) + + cmake_parse_arguments(ARG "${options}" "${one_value_args}" + "${multi_value_args}" ${ARGN}) if(ARG_SOURCES) set(SOURCES ${ARG_SOURCES}) @@ -185,7 +180,8 @@ function(ADD_TEST_CASE TEST_NAME) endif() add_executable(${TEST_NAME} ${SOURCES}) - target_link_libraries(${TEST_NAME} gluten google::glog GTest::gtest GTest::gtest_main Threads::Threads) + target_link_libraries(${TEST_NAME} gluten google::glog GTest::gtest + GTest::gtest_main Threads::Threads) target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/core) if(ARG_EXTRA_LINK_LIBS) diff --git a/cpp/core/CMakeLists.txt b/cpp/core/CMakeLists.txt index dc9ce3435c38..3a4d6e9e8792 100644 --- a/cpp/core/CMakeLists.txt +++ b/cpp/core/CMakeLists.txt @@ -23,25 +23,27 @@ include(GNUInstallDirs) include(CheckCXXCompilerFlag) # Only set arch=native for non-AppleClang compilers. -if (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") +if(NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() set(BOOST_MIN_VERSION "1.42.0") find_package(Boost REQUIRED) -INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) +include_directories(${Boost_INCLUDE_DIRS}) set(source_root_directory ${CMAKE_CURRENT_SOURCE_DIR}) -if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") +if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") cmake_policy(SET CMP0135 NEW) endif() set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH}) -set(SUBSTRAIT_PROTO_SRC_DIR ${GLUTEN_HOME}/gluten-core/src/main/resources/substrait/proto) +set(SUBSTRAIT_PROTO_SRC_DIR + ${GLUTEN_HOME}/gluten-core/src/main/resources/substrait/proto) message(STATUS "Set Substrait Proto Directory in ${SUBSTRAIT_PROTO_SRC_DIR}") -set(GLUTEN_PROTO_SRC_DIR ${GLUTEN_HOME}/gluten-core/src/main/resources/org/apache/gluten/proto) +set(GLUTEN_PROTO_SRC_DIR + ${GLUTEN_HOME}/gluten-core/src/main/resources/org/apache/gluten/proto) message(STATUS "Set Gluten Proto Directory in ${GLUTEN_PROTO_SRC_DIR}") find_program(CCACHE_FOUND ccache) @@ -58,31 +60,25 @@ macro(build_protobuf) set(PROTOBUF_SOURCE_URL "$ENV{GLUTEN_PROTOBUF_URL}") else() set(PROTOBUF_BUILD_VERSION "21.4") - set (PROTOBUF_SOURCE_URL - "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_BUILD_VERSION}/protobuf-all-${PROTOBUF_BUILD_VERSION}.tar.gz") + set(PROTOBUF_SOURCE_URL + "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_BUILD_VERSION}/protobuf-all-${PROTOBUF_BUILD_VERSION}.tar.gz" + ) endif() - set(PROTOBUF_BUILD_SHA256_CHECKSUM "6c5e1b0788afba4569aeebb2cfe205cb154aa01deacaba0cd26442f3b761a836") + set(PROTOBUF_BUILD_SHA256_CHECKSUM + "6c5e1b0788afba4569aeebb2cfe205cb154aa01deacaba0cd26442f3b761a836") set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-install") set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") - set( - PROTOBUF_STATIC_LIB - "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set( - PROTOC_STATIC_LIB - "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}" - ) - set( - PROTOC_BIN - "${PROTOBUF_PREFIX}/bin/protoc" - ) - set( - PROTOBUF_INCLUDE - "${PROTOBUF_PREFIX}/include" - CACHE PATH - "Protobuf include path" - ) + set(PROTOBUF_STATIC_LIB + "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(PROTOC_STATIC_LIB + "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protoc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + set(PROTOC_BIN "${PROTOBUF_PREFIX}/bin/protoc") + set(PROTOBUF_INCLUDE + "${PROTOBUF_PREFIX}/include" + CACHE PATH "Protobuf include path") set(PROTOBUF_COMPILER "${PROTOBUF_PREFIX}/bin/protoc") set(PROTOBUF_CONFIGURE_ARGS "AR=${CMAKE_AR}" @@ -94,22 +90,23 @@ macro(build_protobuf) "CFLAGS=-fPIC" "CXXFLAGS=-fPIC") set(PROTOBUF_BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS}) - ExternalProject_Add(protobuf_ep - PREFIX protobuf_ep - CONFIGURE_COMMAND ./autogen.sh COMMAND "./configure" ${PROTOBUF_CONFIGURE_ARGS} - BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" - BUILD_COMMAND ${PROTOBUF_BUILD_COMMAND} - BUILD_IN_SOURCE 1 - URL ${PROTOBUF_SOURCE_URL} - URL_HASH "SHA256=${PROTOBUF_BUILD_SHA256_CHECKSUM}" - ) + ExternalProject_Add( + protobuf_ep + PREFIX protobuf_ep + CONFIGURE_COMMAND ./autogen.sh + COMMAND "./configure" ${PROTOBUF_CONFIGURE_ARGS} + BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" + BUILD_COMMAND ${PROTOBUF_BUILD_COMMAND} + BUILD_IN_SOURCE 1 + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${PROTOBUF_BUILD_SHA256_CHECKSUM}") file(MAKE_DIRECTORY "${PROTOBUF_INCLUDE_DIR}") add_library(protobuf::libprotobuf STATIC IMPORTED) set_target_properties( protobuf::libprotobuf - PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES - "${PROTOBUF_INCLUDE_DIR}") + PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${PROTOBUF_INCLUDE_DIR}") add_dependencies(protobuf::libprotobuf protobuf_ep) endmacro() @@ -117,17 +114,19 @@ macro(find_protobuf) # Find the existing Protobuf set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") find_package(Protobuf) - if ("${Protobuf_LIBRARY}" STREQUAL "Protobuf_LIBRARY-NOTFOUND") + if("${Protobuf_LIBRARY}" STREQUAL "Protobuf_LIBRARY-NOTFOUND") message(FATAL_ERROR "Protobuf Library Not Found") endif() set(PROTOC_BIN ${Protobuf_PROTOC_EXECUTABLE}) - set(PROTOBUF_INCLUDE "${Protobuf_INCLUDE_DIRS}" CACHE PATH "Protobuf include path") + set(PROTOBUF_INCLUDE + "${Protobuf_INCLUDE_DIRS}" + CACHE PATH "Protobuf include path") endmacro() if(USE_AVX512) # Only enable additional instruction sets if they are supported message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}") - if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") set(AVX512_FLAG "-march=skylake-avx512") check_cxx_compiler_flag(${AVX512_FLAG} CXX_SUPPORTS_AVX512) if(NOT CXX_SUPPORTS_AVX512) @@ -135,7 +134,7 @@ if(USE_AVX512) endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX512_FLAG}") add_definitions(-DCOLUMNAR_PLUGIN_USE_AVX512) - endif () + endif() endif() # Set up Proto @@ -144,83 +143,90 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/proto) # List Substrait Proto compiled files file(GLOB SUBSTRAIT_PROTO_FILES ${SUBSTRAIT_PROTO_SRC_DIR}/substrait/*.proto - ${SUBSTRAIT_PROTO_SRC_DIR}/substrait/extensions/*.proto) -FOREACH(PROTO ${SUBSTRAIT_PROTO_FILES}) + ${SUBSTRAIT_PROTO_SRC_DIR}/substrait/extensions/*.proto) +foreach(PROTO ${SUBSTRAIT_PROTO_FILES}) file(RELATIVE_PATH REL_PROTO ${SUBSTRAIT_PROTO_SRC_DIR} ${PROTO}) string(REGEX REPLACE "\\.proto" "" PROTO_NAME ${REL_PROTO}) - LIST(APPEND SUBSTRAIT_PROTO_SRCS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.cc") - LIST(APPEND SUBSTRAIT_PROTO_HDRS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.h") -ENDFOREACH() -set(SUBSTRAIT_PROTO_OUTPUT_FILES ${SUBSTRAIT_PROTO_HDRS} ${SUBSTRAIT_PROTO_SRCS}) -set_source_files_properties(${SUBSTRAIT_PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE) -get_filename_component(SUBSTRAIT_PROTO_DIR ${SUBSTRAIT_PROTO_SRC_DIR}/ DIRECTORY) + list(APPEND SUBSTRAIT_PROTO_SRCS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.cc") + list(APPEND SUBSTRAIT_PROTO_HDRS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.h") +endforeach() +set(SUBSTRAIT_PROTO_OUTPUT_FILES ${SUBSTRAIT_PROTO_HDRS} + ${SUBSTRAIT_PROTO_SRCS}) +set_source_files_properties(${SUBSTRAIT_PROTO_OUTPUT_FILES} PROPERTIES GENERATED + TRUE) +get_filename_component(SUBSTRAIT_PROTO_DIR ${SUBSTRAIT_PROTO_SRC_DIR}/ + DIRECTORY) # List Gluten Proto compiled files file(GLOB GLUTEN_PROTO_FILES ${GLUTEN_PROTO_SRC_DIR}/*.proto) -FOREACH(PROTO ${GLUTEN_PROTO_FILES}) +foreach(PROTO ${GLUTEN_PROTO_FILES}) file(RELATIVE_PATH REL_PROTO ${GLUTEN_PROTO_SRC_DIR} ${PROTO}) string(REGEX REPLACE "\\.proto" "" PROTO_NAME ${REL_PROTO}) - LIST(APPEND GLUTEN_PROTO_SRCS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.cc") - LIST(APPEND GLUTEN_PROTO_HDRS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.h") -ENDFOREACH() + list(APPEND GLUTEN_PROTO_SRCS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.cc") + list(APPEND GLUTEN_PROTO_HDRS "${PROTO_OUTPUT_DIR}/${PROTO_NAME}.pb.h") +endforeach() set(GLUTEN_PROTO_OUTPUT_FILES ${GLUTEN_PROTO_HDRS} ${GLUTEN_PROTO_SRCS}) -set_source_files_properties(${GLUTEN_PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE) +set_source_files_properties(${GLUTEN_PROTO_OUTPUT_FILES} PROPERTIES GENERATED + TRUE) get_filename_component(GLUTEN_PROTO_DIR ${GLUTEN_PROTO_SRC_DIR}/ DIRECTORY) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes") +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes") message("Core module final CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") set(SPARK_COLUMNAR_PLUGIN_SRCS - ${SUBSTRAIT_PROTO_SRCS} - ${GLUTEN_PROTO_SRCS} - compute/Runtime.cc - compute/ProtobufUtils.cc - compute/ResultIterator.cc - config/GlutenConfig.cc - jni/JniWrapper.cc - memory/AllocationListener.cc - memory/MemoryAllocator.cc - memory/ArrowMemoryPool.cc - memory/ColumnarBatch.cc - operators/writer/ArrowWriter.cc - shuffle/FallbackRangePartitioner.cc - shuffle/HashPartitioner.cc - shuffle/LocalPartitionWriter.cc - shuffle/Options.cc - shuffle/Partitioner.cc - shuffle/Partitioning.cc - shuffle/Payload.cc - shuffle/rss/RssPartitionWriter.cc - shuffle/RoundRobinPartitioner.cc - shuffle/ShuffleMemoryPool.cc - shuffle/ShuffleReader.cc - shuffle/SinglePartitioner.cc - shuffle/Spill.cc - shuffle/Utils.cc - utils/Compression.cc - utils/StringUtil.cc - utils/ObjectStore.cc - jni/JniError.cc - jni/JniCommon.cc) + ${SUBSTRAIT_PROTO_SRCS} + ${GLUTEN_PROTO_SRCS} + compute/Runtime.cc + compute/ProtobufUtils.cc + compute/ResultIterator.cc + config/GlutenConfig.cc + jni/JniWrapper.cc + memory/AllocationListener.cc + memory/MemoryAllocator.cc + memory/ArrowMemoryPool.cc + memory/ColumnarBatch.cc + operators/writer/ArrowWriter.cc + shuffle/FallbackRangePartitioner.cc + shuffle/HashPartitioner.cc + shuffle/LocalPartitionWriter.cc + shuffle/Options.cc + shuffle/Partitioner.cc + shuffle/Partitioning.cc + shuffle/Payload.cc + shuffle/rss/RssPartitionWriter.cc + shuffle/RoundRobinPartitioner.cc + shuffle/ShuffleMemoryPool.cc + shuffle/ShuffleReader.cc + shuffle/SinglePartitioner.cc + shuffle/Spill.cc + shuffle/Utils.cc + utils/Compression.cc + utils/StringUtil.cc + utils/ObjectStore.cc + jni/JniError.cc + jni/JniCommon.cc) file(MAKE_DIRECTORY ${root_directory}/releases) add_library(gluten SHARED ${SPARK_COLUMNAR_PLUGIN_SRCS}) add_dependencies(gluten jni_proto) if(ENABLE_GLUTEN_VCPKG) - # Hide symbols of some static dependencies. Otherwise, if such dependencies are already - # statically linked to libvelox.so, a runtime error will be reported: xxx is being linked - # both statically and dynamically. - target_link_options(gluten PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map) + # Hide symbols of some static dependencies. Otherwise, if such dependencies + # are already statically linked to libvelox.so, a runtime error will be + # reported: xxx is being linked both statically and dynamically. + target_link_options( + gluten PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map) endif() if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) - execute_process(COMMAND ${CMAKE_C_COMPILER} -print-file-name=libstdc++fs.a + execute_process( + COMMAND ${CMAKE_C_COMPILER} -print-file-name=libstdc++fs.a RESULT_VARIABLE LIBSTDCXXFS_STATIC_RESULT OUTPUT_VARIABLE LIBSTDCXXFS_STATIC_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) - if (LIBSTDCXXFS_STATIC_RESULT EQUAL 0 AND EXISTS "${LIBSTDCXXFS_STATIC_PATH}") + if(LIBSTDCXXFS_STATIC_RESULT EQUAL 0 AND EXISTS "${LIBSTDCXXFS_STATIC_PATH}") message(STATUS "libstdc++fs.a found at: ${LIBSTDCXXFS_STATIC_PATH}") target_link_libraries(gluten PRIVATE ${LIBSTDCXXFS_STATIC_PATH}) else() @@ -243,57 +249,55 @@ if(ENABLE_QAT) include(BuildQATzip) include(BuildQATZstd) target_sources(gluten PRIVATE utils/qat/QatCodec.cc) - target_include_directories(gluten PUBLIC ${QATZIP_INCLUDE_DIR} ${QATZSTD_INCLUDE_DIR}) + target_include_directories(gluten PUBLIC ${QATZIP_INCLUDE_DIR} + ${QATZSTD_INCLUDE_DIR}) target_link_libraries(gluten PUBLIC qatzip::qatzip qatzstd::qatzstd) endif() if(ENABLE_IAA) include(BuildQpl) target_include_directories(gluten PUBLIC ${QPL_INCLUDE_DIR}) - target_sources(gluten PRIVATE utils/qpl/qpl_job_pool.cc utils/qpl/qpl_codec.cc) + target_sources(gluten PRIVATE utils/qpl/qpl_job_pool.cc + utils/qpl/qpl_codec.cc) target_link_libraries(gluten PUBLIC qpl::qpl) endif() if(BUILD_PROTOBUF) build_protobuf() message(STATUS "Building ProtoBuf from Source: ${BUILD_PROTOBUF}") - target_link_libraries(gluten - LINK_PRIVATE protobuf::libprotobuf) + target_link_libraries(gluten LINK_PRIVATE protobuf::libprotobuf) else() find_protobuf() message(STATUS "Use existing ProtoBuf libraries: ${PROTOBUF_LIBRARY}") - target_link_libraries(gluten - LINK_PUBLIC ${PROTOBUF_LIBRARY}) + target_link_libraries(gluten LINK_PUBLIC ${PROTOBUF_LIBRARY}) endif() -add_custom_command(OUTPUT ${SUBSTRAIT_PROTO_OUTPUT_FILES} - COMMAND ${PROTOC_BIN} - --proto_path - ${SUBSTRAIT_PROTO_SRC_DIR}/ - --cpp_out - ${PROTO_OUTPUT_DIR} - ${SUBSTRAIT_PROTO_FILES} - DEPENDS ${SUBSTRAIT_PROTO_DIR} - COMMENT "Running Substrait PROTO compiler" - VERBATIM) - -add_custom_command(OUTPUT ${GLUTEN_PROTO_OUTPUT_FILES} - COMMAND ${PROTOC_BIN} - --proto_path - ${GLUTEN_PROTO_SRC_DIR}/ - --cpp_out - ${PROTO_OUTPUT_DIR} - ${GLUTEN_PROTO_FILES} - DEPENDS ${GLUTEN_PROTO_DIR} - COMMENT "Running Gluten PROTO compiler" - VERBATIM) - -add_custom_target(jni_proto ALL DEPENDS ${SUBSTRAIT_PROTO_OUTPUT_FILES} ${GLUTEN_PROTO_OUTPUT_FILES}) +add_custom_command( + OUTPUT ${SUBSTRAIT_PROTO_OUTPUT_FILES} + COMMAND ${PROTOC_BIN} --proto_path ${SUBSTRAIT_PROTO_SRC_DIR}/ --cpp_out + ${PROTO_OUTPUT_DIR} ${SUBSTRAIT_PROTO_FILES} + DEPENDS ${SUBSTRAIT_PROTO_DIR} + COMMENT "Running Substrait PROTO compiler" + VERBATIM) + +add_custom_command( + OUTPUT ${GLUTEN_PROTO_OUTPUT_FILES} + COMMAND ${PROTOC_BIN} --proto_path ${GLUTEN_PROTO_SRC_DIR}/ --cpp_out + ${PROTO_OUTPUT_DIR} ${GLUTEN_PROTO_FILES} + DEPENDS ${GLUTEN_PROTO_DIR} + COMMENT "Running Gluten PROTO compiler" + VERBATIM) + +add_custom_target(jni_proto ALL DEPENDS ${SUBSTRAIT_PROTO_OUTPUT_FILES} + ${GLUTEN_PROTO_OUTPUT_FILES}) add_dependencies(jni_proto protobuf::libprotobuf) -target_include_directories(gluten PUBLIC ${CMAKE_SYSTEM_INCLUDE_PATH} ${JNI_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} ${PROTO_OUTPUT_DIR} ${PROTOBUF_INCLUDE}) -set_target_properties(gluten PROPERTIES - LIBRARY_OUTPUT_DIRECTORY ${root_directory}/releases) +target_include_directories( + gluten + PUBLIC ${CMAKE_SYSTEM_INCLUDE_PATH} ${JNI_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} ${PROTO_OUTPUT_DIR} ${PROTOBUF_INCLUDE}) +set_target_properties(gluten PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${root_directory}/releases) include(Findjemalloc_pic) # Build Jemalloc @@ -313,23 +317,26 @@ if(BUILD_BENCHMARKS) add_subdirectory(benchmarks) endif() - if(DEFINED ENV{HADOOP_HOME}) set(LIBHDFS3_DESTINATION $ENV{HADOOP_HOME}/lib/native) else() set(LIBHDFS3_DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() -target_link_libraries(gluten - PUBLIC Arrow::arrow Arrow::parquet) +target_link_libraries(gluten PUBLIC Arrow::arrow Arrow::parquet) target_link_libraries(gluten PRIVATE google::glog) -install(TARGETS gluten - DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(TARGETS gluten DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/resources/libhdfs.so DESTINATION ${LIBHDFS3_DESTINATION}) -add_custom_command(TARGET gluten POST_BUILD COMMAND ld $ || true - COMMENT "Checking ld result of libgluten.so") -add_custom_command(TARGET gluten POST_BUILD COMMAND ldd $ || true - COMMENT "Checking ldd result of libgluten.so") +add_custom_command( + TARGET gluten + POST_BUILD + COMMAND ld $ || true + COMMENT "Checking ld result of libgluten.so") +add_custom_command( + TARGET gluten + POST_BUILD + COMMAND ldd $ || true + COMMENT "Checking ldd result of libgluten.so") diff --git a/cpp/core/benchmarks/CMakeLists.txt b/cpp/core/benchmarks/CMakeLists.txt index 6d39501477df..4b4c7656639c 100644 --- a/cpp/core/benchmarks/CMakeLists.txt +++ b/cpp/core/benchmarks/CMakeLists.txt @@ -31,7 +31,8 @@ macro(package_add_gbenchmark TESTNAME) add_executable(${TESTNAME} ${ARGN}) - target_link_libraries(${TESTNAME} benchmark::benchmark gluten google::glog ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(${TESTNAME} benchmark::benchmark gluten google::glog + ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(${TESTNAME} PUBLIC ${source_root_directory}) set_target_properties(${TESTNAME} PROPERTIES FOLDER tests) endmacro() diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index 34cc9001cf38..6d66ea506a7e 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -23,39 +23,54 @@ include(GNUInstallDirs) include(CheckCXXCompilerFlag) include(FindPackageHandleStandardArgs) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes") -if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes") +if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") endif() -set(SYSTEM_LIB_PATH "/usr/lib" CACHE PATH "System Lib dir") -set(SYSTEM_LIB64_PATH "/usr/lib64" CACHE PATH "System Lib64 dir") -set(SYSTEM_LOCAL_LIB_PATH "/usr/local/lib" CACHE PATH "System Local Lib dir") -set(SYSTEM_LOCAL_LIB64_PATH "/usr/local/lib64" CACHE PATH "System Local Lib64 dir") -if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") - set(SYSTEM_LIB_MULTIARCH_PATH "/usr/lib/x86_64-linux-gnu" CACHE PATH "System Lib MultiArch dir") +set(SYSTEM_LIB_PATH + "/usr/lib" + CACHE PATH "System Lib dir") +set(SYSTEM_LIB64_PATH + "/usr/lib64" + CACHE PATH "System Lib64 dir") +set(SYSTEM_LOCAL_LIB_PATH + "/usr/local/lib" + CACHE PATH "System Local Lib dir") +set(SYSTEM_LOCAL_LIB64_PATH + "/usr/local/lib64" + CACHE PATH "System Local Lib64 dir") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set(SYSTEM_LIB_MULTIARCH_PATH + "/usr/lib/x86_64-linux-gnu" + CACHE PATH "System Lib MultiArch dir") elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) - set(SYSTEM_LIB_MULTIARCH_PATH "/usr/lib/aarch64-linux-gnu" CACHE PATH "System Lib MultiArch dir") + set(SYSTEM_LIB_MULTIARCH_PATH + "/usr/lib/aarch64-linux-gnu" + CACHE PATH "System Lib MultiArch dir") elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) - set(SYSTEM_LIB_MULTIARCH_PATH "/usr/lib" CACHE PATH "System Lib MultiArch dir") + set(SYSTEM_LIB_MULTIARCH_PATH + "/usr/lib" + CACHE PATH "System Lib MultiArch dir") else() message(FATAL_ERROR "Unsupported processor type: ${CMAKE_SYSTEM_PROCESSOR}") endif() -if (NOT DEFINED VELOX_HOME) +if(NOT DEFINED VELOX_HOME) set(VELOX_HOME ${GLUTEN_HOME}/ep/build-velox/build/velox_ep) message(STATUS "Set VELOX_HOME to ${VELOX_HOME}") endif() # Keep same compile option with Velox. execute_process( - COMMAND + COMMAND bash -c "( source ${VELOX_HOME}/scripts/setup-helper-functions.sh && echo -n $(get_cxx_flags $ENV{CPU_TARGET}))" - OUTPUT_VARIABLE SCRIPT_CXX_FLAGS - RESULT_VARIABLE COMMAND_STATUS) + OUTPUT_VARIABLE SCRIPT_CXX_FLAGS + RESULT_VARIABLE COMMAND_STATUS) if(COMMAND_STATUS EQUAL "1") - message(FATAL_ERROR "Unable to determine compiler flags!") + message(FATAL_ERROR "Unable to determine compiler flags!") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SCRIPT_CXX_FLAGS}") @@ -63,10 +78,14 @@ message("Velox module final CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}") # User can specify VELOX_BUILD_PATH, if Velox are built elsewhere. if(NOT DEFINED VELOX_BUILD_PATH) - if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") - set(VELOX_BUILD_PATH "${VELOX_HOME}/_build/debug" CACHE PATH "Velox build directory.") + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") + set(VELOX_BUILD_PATH + "${VELOX_HOME}/_build/debug" + CACHE PATH "Velox build directory.") else() - set(VELOX_BUILD_PATH "${VELOX_HOME}/_build/release" CACHE PATH "Velox build directory.") + set(VELOX_BUILD_PATH + "${VELOX_HOME}/_build/release" + CACHE PATH "Velox build directory.") endif() endif() @@ -78,25 +97,28 @@ function(ADD_VELOX_DEPENDENCY VELOX_DEP_LIB_NAME VELOX_DEP_LIB_PATH) endif() set(VELOX_DEP_LIB facebook::velox::${VELOX_DEP_LIB_NAME}) add_library(${VELOX_DEP_LIB} STATIC IMPORTED) - set_target_properties(${VELOX_DEP_LIB} PROPERTIES - IMPORTED_LOCATION ${VELOX_DEP_LIB_PATH}) + set_target_properties(${VELOX_DEP_LIB} PROPERTIES IMPORTED_LOCATION + ${VELOX_DEP_LIB_PATH}) target_link_libraries(velox PUBLIC ${VELOX_DEP_LIB}) endfunction() macro(ADD_VELOX_OBJECTS) add_library(velox_objects OBJECT IMPORTED GLOBAL) - set_property(TARGET velox_objects PROPERTY IMPORTED_OBJECTS - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/FileHandle.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveConfig.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveConnector.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveDataSink.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveDataSource.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HivePartitionUtil.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/PartitionIdGenerator.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/SplitReader.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/TableHandle.cpp.o" - "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveConnectorUtil.cpp.o" - ) + set_property( + TARGET velox_objects + PROPERTY + IMPORTED_OBJECTS + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/FileHandle.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveConfig.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveConnector.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveDataSink.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveDataSource.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HivePartitionUtil.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/PartitionIdGenerator.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/SplitReader.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/TableHandle.cpp.o" + "${VELOX_COMPONENTS_PATH}/connectors/hive/CMakeFiles/velox_hive_connector.dir/HiveConnectorUtil.cpp.o" + ) target_link_libraries(velox PUBLIC velox_objects) endmacro() @@ -112,125 +134,302 @@ endmacro() macro(ADD_VELOX_DEPENDENCIES) add_velox_objects() - add_velox_dependency(expression::sigparser "${VELOX_COMPONENTS_PATH}/expression/signature_parser/libvelox_signature_parser.a") - add_velox_dependency(functions::sparksql::lib "${VELOX_COMPONENTS_PATH}/functions/sparksql/libvelox_functions_spark.a") - add_velox_dependency(functions::sparksql::agg "${VELOX_COMPONENTS_PATH}/functions/sparksql/aggregates/libvelox_functions_spark_aggregates.a") - add_velox_dependency(functions::window::sparksql "${VELOX_COMPONENTS_PATH}/functions/sparksql/window/libvelox_functions_spark_window.a") - add_velox_dependency(functions::prestosql::agg "${VELOX_COMPONENTS_PATH}/functions/prestosql/aggregates/libvelox_aggregates.a") - add_velox_dependency(functions::lib::agg "${VELOX_COMPONENTS_PATH}/functions/lib/aggregates/libvelox_functions_aggregates.a") - add_velox_dependency(functions::prestosql::window "${VELOX_COMPONENTS_PATH}/functions/prestosql/window/libvelox_window.a") - add_velox_dependency(functions::lib::window "${VELOX_COMPONENTS_PATH}/functions/lib/window/libvelox_functions_window.a") - add_velox_dependency(velox::buffer "${VELOX_COMPONENTS_PATH}/buffer/libvelox_buffer.a") - - add_velox_dependency(functions::isnull "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_is_null_functions.a") - add_velox_dependency(functions::prestosql "${VELOX_COMPONENTS_PATH}/functions/prestosql/registration/libvelox_functions_prestosql.a") - add_velox_dependency(functions::prestosql::impl "${VELOX_COMPONENTS_PATH}/functions/prestosql/libvelox_functions_prestosql_impl.a") - add_velox_dependency(functions::json "${VELOX_COMPONENTS_PATH}/functions/prestosql/json/libvelox_functions_json.a") - add_velox_dependency(functions::hyperloglog "${VELOX_COMPONENTS_PATH}/common/hyperloglog/libvelox_common_hyperloglog.a") - add_velox_dependency(functions::lib "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_functions_lib.a") - add_velox_dependency(functions::lib::date_time_formatter "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_functions_lib_date_time_formatter.a") + add_velox_dependency( + expression::sigparser + "${VELOX_COMPONENTS_PATH}/expression/signature_parser/libvelox_signature_parser.a" + ) + add_velox_dependency( + functions::sparksql::lib + "${VELOX_COMPONENTS_PATH}/functions/sparksql/libvelox_functions_spark.a") + add_velox_dependency( + functions::sparksql::agg + "${VELOX_COMPONENTS_PATH}/functions/sparksql/aggregates/libvelox_functions_spark_aggregates.a" + ) + add_velox_dependency( + functions::window::sparksql + "${VELOX_COMPONENTS_PATH}/functions/sparksql/window/libvelox_functions_spark_window.a" + ) + add_velox_dependency( + functions::prestosql::agg + "${VELOX_COMPONENTS_PATH}/functions/prestosql/aggregates/libvelox_aggregates.a" + ) + add_velox_dependency( + functions::lib::agg + "${VELOX_COMPONENTS_PATH}/functions/lib/aggregates/libvelox_functions_aggregates.a" + ) + add_velox_dependency( + functions::prestosql::window + "${VELOX_COMPONENTS_PATH}/functions/prestosql/window/libvelox_window.a") + add_velox_dependency( + functions::lib::window + "${VELOX_COMPONENTS_PATH}/functions/lib/window/libvelox_functions_window.a") + add_velox_dependency(velox::buffer + "${VELOX_COMPONENTS_PATH}/buffer/libvelox_buffer.a") + + add_velox_dependency( + functions::isnull + "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_is_null_functions.a") + add_velox_dependency( + functions::prestosql + "${VELOX_COMPONENTS_PATH}/functions/prestosql/registration/libvelox_functions_prestosql.a" + ) + add_velox_dependency( + functions::prestosql::impl + "${VELOX_COMPONENTS_PATH}/functions/prestosql/libvelox_functions_prestosql_impl.a" + ) + add_velox_dependency( + functions::json + "${VELOX_COMPONENTS_PATH}/functions/prestosql/json/libvelox_functions_json.a" + ) + add_velox_dependency( + functions::hyperloglog + "${VELOX_COMPONENTS_PATH}/common/hyperloglog/libvelox_common_hyperloglog.a") + add_velox_dependency( + functions::lib + "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_functions_lib.a") + add_velox_dependency( + functions::lib::date_time_formatter + "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_functions_lib_date_time_formatter.a" + ) if(BUILD_TESTS) - add_velox_dependency(exec::test "${VELOX_COMPONENTS_PATH}/exec/tests/utils/libvelox_exec_test_lib.a") - add_velox_dependency(temp::path "${VELOX_COMPONENTS_PATH}/exec/tests/utils/libvelox_temp_path.a") - add_velox_dependency(dwio::common::test::utils "${VELOX_COMPONENTS_PATH}/dwio/common/tests/utils/libvelox_dwio_common_test_utils.a") + add_velox_dependency( + exec::test + "${VELOX_COMPONENTS_PATH}/exec/tests/utils/libvelox_exec_test_lib.a") + add_velox_dependency( + temp::path + "${VELOX_COMPONENTS_PATH}/exec/tests/utils/libvelox_temp_path.a") + add_velox_dependency( + dwio::common::test::utils + "${VELOX_COMPONENTS_PATH}/dwio/common/tests/utils/libvelox_dwio_common_test_utils.a" + ) endif() add_velox_dependency(exec "${VELOX_COMPONENTS_PATH}/exec/libvelox_exec.a") if(BUILD_TESTS) - add_velox_dependency(parse::parser "${VELOX_COMPONENTS_PATH}/parse/libvelox_parse_parser.a") - add_velox_dependency(duckdb::parser "${VELOX_COMPONENTS_PATH}/duckdb/conversion/libvelox_duckdb_parser.a") - add_velox_dependency(parse::expression "${VELOX_COMPONENTS_PATH}/parse/libvelox_parse_expression.a") - add_velox_dependency(parse::utils "${VELOX_COMPONENTS_PATH}/parse/libvelox_parse_utils.a") - add_velox_dependency(function::registry "${VELOX_COMPONENTS_PATH}/functions/libvelox_function_registry.a") + add_velox_dependency( + parse::parser "${VELOX_COMPONENTS_PATH}/parse/libvelox_parse_parser.a") + add_velox_dependency( + duckdb::parser + "${VELOX_COMPONENTS_PATH}/duckdb/conversion/libvelox_duckdb_parser.a") + add_velox_dependency( + parse::expression + "${VELOX_COMPONENTS_PATH}/parse/libvelox_parse_expression.a") + add_velox_dependency( + parse::utils "${VELOX_COMPONENTS_PATH}/parse/libvelox_parse_utils.a") + add_velox_dependency( + function::registry + "${VELOX_COMPONENTS_PATH}/functions/libvelox_function_registry.a") endif() - add_velox_dependency(vector::arrow::bridge "${VELOX_COMPONENTS_PATH}/vector/arrow/libvelox_arrow_bridge.a") + add_velox_dependency( + vector::arrow::bridge + "${VELOX_COMPONENTS_PATH}/vector/arrow/libvelox_arrow_bridge.a") add_velox_dependency(row "${VELOX_COMPONENTS_PATH}/row/libvelox_row_fast.a") - add_velox_dependency(connector "${VELOX_COMPONENTS_PATH}/connectors/libvelox_connector.a") - add_velox_dependency(connector::hive_parition "${VELOX_COMPONENTS_PATH}/connectors/hive/libvelox_hive_partition_function.a") - add_velox_dependency(connector::hive::iceberg::IcebergSplitReader "${VELOX_COMPONENTS_PATH}/connectors/hive/iceberg/libvelox_hive_iceberg_splitreader.a") + add_velox_dependency( + connector "${VELOX_COMPONENTS_PATH}/connectors/libvelox_connector.a") + add_velox_dependency( + connector::hive_parition + "${VELOX_COMPONENTS_PATH}/connectors/hive/libvelox_hive_partition_function.a" + ) + add_velox_dependency( + connector::hive::iceberg::IcebergSplitReader + "${VELOX_COMPONENTS_PATH}/connectors/hive/iceberg/libvelox_hive_iceberg_splitreader.a" + ) if(ENABLE_HDFS) - add_velox_dependency(connector::hive::hdfs "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/hdfs/libvelox_hdfs.a") + add_velox_dependency( + connector::hive::hdfs + "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/hdfs/libvelox_hdfs.a" + ) endif() if(ENABLE_GCS) - add_velox_dependency(connector::hive::gcs "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/gcs/libvelox_gcs.a") + add_velox_dependency( + connector::hive::gcs + "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/gcs/libvelox_gcs.a" + ) endif() if(ENABLE_S3) - add_velox_dependency(connector::hive::s3fs "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/s3fs/libvelox_s3fs.a") + add_velox_dependency( + connector::hive::s3fs + "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/s3fs/libvelox_s3fs.a" + ) endif() if(ENABLE_ABFS) - add_velox_dependency(connector::hive::abfs "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/abfs/libvelox_abfs.a") + add_velox_dependency( + connector::hive::abfs + "${VELOX_COMPONENTS_PATH}/connectors/hive/storage_adapters/abfs/libvelox_abfs.a" + ) endif() - add_velox_dependency(dwio::dwrf::writer "${VELOX_COMPONENTS_PATH}/dwio/dwrf/writer/libvelox_dwio_dwrf_writer.a") - add_velox_dependency(dwio::dwrf::reader "${VELOX_COMPONENTS_PATH}/dwio/dwrf/reader/libvelox_dwio_dwrf_reader.a") - add_velox_dependency(dwio::dwrf::utils "${VELOX_COMPONENTS_PATH}/dwio/dwrf/utils/libvelox_dwio_dwrf_utils.a") - add_velox_dependency(dwio::dwrf::common "${VELOX_COMPONENTS_PATH}/dwio/dwrf/common/libvelox_dwio_dwrf_common.a") - add_velox_dependency(parquet "${VELOX_COMPONENTS_PATH}/dwio/parquet/libvelox_dwio_parquet_reader.a") - add_velox_dependency(parquet::reader::native "${VELOX_COMPONENTS_PATH}/dwio/parquet/reader/libvelox_dwio_native_parquet_reader.a") + add_velox_dependency( + dwio::dwrf::writer + "${VELOX_COMPONENTS_PATH}/dwio/dwrf/writer/libvelox_dwio_dwrf_writer.a") + add_velox_dependency( + dwio::dwrf::reader + "${VELOX_COMPONENTS_PATH}/dwio/dwrf/reader/libvelox_dwio_dwrf_reader.a") + add_velox_dependency( + dwio::dwrf::utils + "${VELOX_COMPONENTS_PATH}/dwio/dwrf/utils/libvelox_dwio_dwrf_utils.a") + add_velox_dependency( + dwio::dwrf::common + "${VELOX_COMPONENTS_PATH}/dwio/dwrf/common/libvelox_dwio_dwrf_common.a") + add_velox_dependency( + parquet + "${VELOX_COMPONENTS_PATH}/dwio/parquet/libvelox_dwio_parquet_reader.a") + add_velox_dependency( + parquet::reader::native + "${VELOX_COMPONENTS_PATH}/dwio/parquet/reader/libvelox_dwio_native_parquet_reader.a" + ) if(BUILD_TESTS) - add_velox_dependency(dwio::common::utils "${VELOX_COMPONENTS_PATH}/dwio/common/tests/utils/libvelox_dwio_common_test_utils.a") - add_velox_dependency(dwio::dwrf::test_utils "${VELOX_COMPONENTS_PATH}/dwio/dwrf/test/utils/libvelox_dwrf_test_utils.a") - add_velox_dependency(parquet::reader::duckdb_conversion "${VELOX_COMPONENTS_PATH}/duckdb/conversion/libvelox_duckdb_conversion.a") + add_velox_dependency( + dwio::common::utils + "${VELOX_COMPONENTS_PATH}/dwio/common/tests/utils/libvelox_dwio_common_test_utils.a" + ) + add_velox_dependency( + dwio::dwrf::test_utils + "${VELOX_COMPONENTS_PATH}/dwio/dwrf/test/utils/libvelox_dwrf_test_utils.a" + ) + add_velox_dependency( + parquet::reader::duckdb_conversion + "${VELOX_COMPONENTS_PATH}/duckdb/conversion/libvelox_duckdb_conversion.a") add_duckdb() - add_velox_dependency(tpch::gen "${VELOX_COMPONENTS_PATH}/tpch/gen/libvelox_tpch_gen.a") - add_velox_dependency(dbgen "${VELOX_COMPONENTS_PATH}/tpch/gen/dbgen/libvelox_dbgen.a") + add_velox_dependency( + tpch::gen "${VELOX_COMPONENTS_PATH}/tpch/gen/libvelox_tpch_gen.a") + add_velox_dependency( + dbgen "${VELOX_COMPONENTS_PATH}/tpch/gen/dbgen/libvelox_dbgen.a") endif() - add_velox_dependency(parquet::reader::thrift "${VELOX_COMPONENTS_PATH}/dwio/parquet/thrift/libvelox_dwio_parquet_thrift.a") - - add_velox_dependency(velox::arrow::parquet::writer "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/libvelox_dwio_arrow_parquet_writer.a") - add_velox_dependency(dwio::arrow::parquet::writer "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/arrow/libvelox_dwio_arrow_parquet_writer_lib.a") - add_velox_dependency(dwio::arrow::parquet::writer::util "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/arrow/util/libvelox_dwio_arrow_parquet_writer_util_lib.a") - add_velox_dependency(dwio::arrow::parquet::writer::thrift::lib "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/arrow/generated/libvelox_dwio_arrow_parquet_writer_thrift_lib.a") - add_velox_dependency(dwio::common::compression "${VELOX_COMPONENTS_PATH}/dwio/common/compression/libvelox_dwio_common_compression.a") - add_velox_dependency(dwio::common "${VELOX_COMPONENTS_PATH}/dwio/common/libvelox_dwio_common.a") - add_velox_dependency(functions::prestosql::types "${VELOX_COMPONENTS_PATH}/functions/prestosql/types/libvelox_presto_types.a") - add_velox_dependency(functions::spark::specialforms "${VELOX_COMPONENTS_PATH}/functions/sparksql/specialforms/libvelox_functions_spark_specialforms.a") - add_velox_dependency(expression "${VELOX_COMPONENTS_PATH}/expression/libvelox_expression.a") + add_velox_dependency( + parquet::reader::thrift + "${VELOX_COMPONENTS_PATH}/dwio/parquet/thrift/libvelox_dwio_parquet_thrift.a" + ) + + add_velox_dependency( + velox::arrow::parquet::writer + "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/libvelox_dwio_arrow_parquet_writer.a" + ) + add_velox_dependency( + dwio::arrow::parquet::writer + "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/arrow/libvelox_dwio_arrow_parquet_writer_lib.a" + ) + add_velox_dependency( + dwio::arrow::parquet::writer::util + "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/arrow/util/libvelox_dwio_arrow_parquet_writer_util_lib.a" + ) + add_velox_dependency( + dwio::arrow::parquet::writer::thrift::lib + "${VELOX_COMPONENTS_PATH}/dwio/parquet/writer/arrow/generated/libvelox_dwio_arrow_parquet_writer_thrift_lib.a" + ) + add_velox_dependency( + dwio::common::compression + "${VELOX_COMPONENTS_PATH}/dwio/common/compression/libvelox_dwio_common_compression.a" + ) + add_velox_dependency( + dwio::common "${VELOX_COMPONENTS_PATH}/dwio/common/libvelox_dwio_common.a") + add_velox_dependency( + functions::prestosql::types + "${VELOX_COMPONENTS_PATH}/functions/prestosql/types/libvelox_presto_types.a" + ) + add_velox_dependency( + functions::spark::specialforms + "${VELOX_COMPONENTS_PATH}/functions/sparksql/specialforms/libvelox_functions_spark_specialforms.a" + ) + add_velox_dependency( + expression "${VELOX_COMPONENTS_PATH}/expression/libvelox_expression.a") add_velox_dependency(core "${VELOX_COMPONENTS_PATH}/core/libvelox_core.a") - add_velox_dependency(type::fbhive "${VELOX_COMPONENTS_PATH}/type/fbhive/libvelox_type_fbhive.a") + add_velox_dependency( + type::fbhive "${VELOX_COMPONENTS_PATH}/type/fbhive/libvelox_type_fbhive.a") add_velox_dependency(type "${VELOX_COMPONENTS_PATH}/type/libvelox_type.a") - add_velox_dependency(vector::serializes "${VELOX_COMPONENTS_PATH}/serializers/libvelox_presto_serializer.a") - add_velox_dependency(functions::lib::util "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_functions_util.a") - add_velox_dependency(vector "${VELOX_COMPONENTS_PATH}/vector/libvelox_vector.a") - add_velox_dependency(expression::function "${VELOX_COMPONENTS_PATH}/expression/libvelox_expression_functions.a") - add_velox_dependency(expression::type_calculation "${VELOX_COMPONENTS_PATH}/expression/type_calculation/libvelox_type_calculation.a") - - add_velox_dependency(common::caching "${VELOX_COMPONENTS_PATH}/common/caching/libvelox_caching.a") - add_velox_dependency(common::base "${VELOX_COMPONENTS_PATH}/common/base/libvelox_common_base.a") - add_velox_dependency(common::memory "${VELOX_COMPONENTS_PATH}/common/memory/libvelox_memory.a") - add_velox_dependency(common::serialization "${VELOX_COMPONENTS_PATH}/common/serialization/libvelox_serialization.a") - add_velox_dependency(common::base::exception "${VELOX_COMPONENTS_PATH}/common/base/libvelox_exception.a") - - add_velox_dependency(type::tz "${VELOX_COMPONENTS_PATH}/type/tz/libvelox_type_tz.a") - add_velox_dependency(dwio::dwrf::proto "${VELOX_COMPONENTS_PATH}/dwio/dwrf/proto/libvelox_dwio_dwrf_proto.a") - add_velox_dependency(dwio::catalog::fbhive "${VELOX_COMPONENTS_PATH}/dwio/catalog/fbhive/libvelox_dwio_catalog_fbhive.a") - add_velox_dependency(dwio::common::exception "${VELOX_COMPONENTS_PATH}/dwio/common/exception/libvelox_dwio_common_exception.a") - add_velox_dependency(dwio::common::encryption "${VELOX_COMPONENTS_PATH}/dwio/common/encryption/libvelox_dwio_common_encryption.a") - - add_velox_dependency(core::config "${VELOX_COMPONENTS_PATH}/core/libvelox_config.a") - add_velox_dependency(common::encode "${VELOX_COMPONENTS_PATH}/common/encode/libvelox_encode.a") - add_velox_dependency(common::time "${VELOX_COMPONENTS_PATH}/common/time/libvelox_time.a") + add_velox_dependency( + vector::serializes + "${VELOX_COMPONENTS_PATH}/serializers/libvelox_presto_serializer.a") + add_velox_dependency( + functions::lib::util + "${VELOX_COMPONENTS_PATH}/functions/lib/libvelox_functions_util.a") + add_velox_dependency(vector + "${VELOX_COMPONENTS_PATH}/vector/libvelox_vector.a") + add_velox_dependency( + expression::function + "${VELOX_COMPONENTS_PATH}/expression/libvelox_expression_functions.a") + add_velox_dependency( + expression::type_calculation + "${VELOX_COMPONENTS_PATH}/expression/type_calculation/libvelox_type_calculation.a" + ) + + add_velox_dependency( + common::caching + "${VELOX_COMPONENTS_PATH}/common/caching/libvelox_caching.a") + add_velox_dependency( + common::base "${VELOX_COMPONENTS_PATH}/common/base/libvelox_common_base.a") + add_velox_dependency( + common::memory "${VELOX_COMPONENTS_PATH}/common/memory/libvelox_memory.a") + add_velox_dependency( + common::serialization + "${VELOX_COMPONENTS_PATH}/common/serialization/libvelox_serialization.a") + add_velox_dependency( + common::base::exception + "${VELOX_COMPONENTS_PATH}/common/base/libvelox_exception.a") + + add_velox_dependency(type::tz + "${VELOX_COMPONENTS_PATH}/type/tz/libvelox_type_tz.a") + add_velox_dependency( + dwio::dwrf::proto + "${VELOX_COMPONENTS_PATH}/dwio/dwrf/proto/libvelox_dwio_dwrf_proto.a") + add_velox_dependency( + dwio::catalog::fbhive + "${VELOX_COMPONENTS_PATH}/dwio/catalog/fbhive/libvelox_dwio_catalog_fbhive.a" + ) + add_velox_dependency( + dwio::common::exception + "${VELOX_COMPONENTS_PATH}/dwio/common/exception/libvelox_dwio_common_exception.a" + ) + add_velox_dependency( + dwio::common::encryption + "${VELOX_COMPONENTS_PATH}/dwio/common/encryption/libvelox_dwio_common_encryption.a" + ) + + add_velox_dependency(core::config + "${VELOX_COMPONENTS_PATH}/core/libvelox_config.a") + add_velox_dependency( + common::encode "${VELOX_COMPONENTS_PATH}/common/encode/libvelox_encode.a") + add_velox_dependency(common::time + "${VELOX_COMPONENTS_PATH}/common/time/libvelox_time.a") if(BUILD_TESTS) - add_velox_dependency(common::file::test "${VELOX_COMPONENTS_PATH}/common/file/tests/libvelox_file_test_utils.a") + add_velox_dependency( + common::file::test + "${VELOX_COMPONENTS_PATH}/common/file/tests/libvelox_file_test_utils.a") endif() - add_velox_dependency(common::file "${VELOX_COMPONENTS_PATH}/common/file/libvelox_file.a") - add_velox_dependency(common::process "${VELOX_COMPONENTS_PATH}/common/process/libvelox_process.a") - - add_velox_dependency(common::test_util "${VELOX_COMPONENTS_PATH}/common/testutil/libvelox_test_util.a") - - add_velox_dependency(external::md5 "${VELOX_COMPONENTS_PATH}/external/md5/libmd5.a") - add_velox_dependency(external::date "${VELOX_COMPONENTS_PATH}/external/date/libvelox_external_date.a") - add_velox_dependency(velox::parquet::writer "${VELOX_COMPONENTS_PATH}/dwio/parquet/libvelox_dwio_parquet_writer.a") + add_velox_dependency(common::file + "${VELOX_COMPONENTS_PATH}/common/file/libvelox_file.a") + add_velox_dependency( + common::process + "${VELOX_COMPONENTS_PATH}/common/process/libvelox_process.a") + + add_velox_dependency( + common::test_util + "${VELOX_COMPONENTS_PATH}/common/testutil/libvelox_test_util.a") + + add_velox_dependency(external::md5 + "${VELOX_COMPONENTS_PATH}/external/md5/libmd5.a") + add_velox_dependency( + external::date + "${VELOX_COMPONENTS_PATH}/external/date/libvelox_external_date.a") + add_velox_dependency( + velox::parquet::writer + "${VELOX_COMPONENTS_PATH}/dwio/parquet/libvelox_dwio_parquet_writer.a") if(BUILD_TESTS) - add_velox_dependency(vector::test::util "${VELOX_COMPONENTS_PATH}/vector/tests/utils/libvelox_vector_test_lib.a") + add_velox_dependency( + vector::test::util + "${VELOX_COMPONENTS_PATH}/vector/tests/utils/libvelox_vector_test_lib.a") endif() - add_velox_dependency(common::compression "${VELOX_COMPONENTS_PATH}/common/compression/libvelox_common_compression.a") - add_velox_dependency(common::io "${VELOX_COMPONENTS_PATH}/common/io/libvelox_common_io.a") - add_velox_dependency(velox::status "${VELOX_COMPONENTS_PATH}/common/base/libvelox_status.a") + add_velox_dependency( + common::compression + "${VELOX_COMPONENTS_PATH}/common/compression/libvelox_common_compression.a") + add_velox_dependency( + common::io "${VELOX_COMPONENTS_PATH}/common/io/libvelox_common_io.a") + add_velox_dependency(velox::status + "${VELOX_COMPONENTS_PATH}/common/base/libvelox_status.a") endmacro() macro(find_libhdfs3) @@ -241,18 +440,16 @@ macro(find_libhdfs3) find_path(libhdfs3_INCLUDE_DIR hdfs/hdfs.h) set(CMAKE_FIND_LIBRARY_SUFFIXES ".so") find_library(libhdfs3_LIBRARY NAMES hdfs3) - find_package_handle_standard_args(libhdfs3 DEFAULT_MSG - libhdfs3_INCLUDE_DIR - libhdfs3_LIBRARY - ) + find_package_handle_standard_args(libhdfs3 DEFAULT_MSG libhdfs3_INCLUDE_DIR + libhdfs3_LIBRARY) add_library(HDFS::hdfs3 SHARED IMPORTED) - set_target_properties(HDFS::hdfs3 PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${libhdfs3_INCLUDE_DIR}" - IMPORTED_LOCATION "${libhdfs3_LIBRARY}" - ) + set_target_properties( + HDFS::hdfs3 + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${libhdfs3_INCLUDE_DIR}" + IMPORTED_LOCATION "${libhdfs3_LIBRARY}") endif() - if (NOT libhdfs3_FOUND) + if(NOT libhdfs3_FOUND) message(FATAL_ERROR "LIBHDFS3 Library Not Found") endif() endmacro() @@ -262,30 +459,35 @@ macro(find_re2) if(re2_FOUND AND TARGET re2::re2) set(RE2_LIBRARY re2::re2) else() - find_library(RE2_LIBRARY NAMES re2 PATHS ${SYSTEM_LIB_PATH} ${SYSTEM_LIB64_PATH} ${SYSTEM_LIB_MULTIARCH_PATH} ${SYSTEM_LOCAL_LIB_PATH} ${SYSTEM_LOCAL_LIB64_PATH} NO_DEFAULT_PATH) + find_library( + RE2_LIBRARY + NAMES re2 + PATHS ${SYSTEM_LIB_PATH} ${SYSTEM_LIB64_PATH} ${SYSTEM_LIB_MULTIARCH_PATH} + ${SYSTEM_LOCAL_LIB_PATH} ${SYSTEM_LOCAL_LIB64_PATH} + NO_DEFAULT_PATH) endif() - if (NOT RE2_LIBRARY) - message(FATAL_ERROR "RE2 Library Not Found") + if(NOT RE2_LIBRARY) + message(FATAL_ERROR "RE2 Library Not Found") else() message(STATUS "RE2 Library Can Be Found in ${RE2_LIBRARY}") endif() endmacro() macro(find_awssdk) - set (CMAKE_FIND_LIBRARY_SUFFIXES ".a") + set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") find_package(AWSSDK REQUIRED COMPONENTS s3;identity-management) endmacro() macro(find_gcssdk) - set (CMAKE_FIND_LIBRARY_SUFFIXES ".so") + set(CMAKE_FIND_LIBRARY_SUFFIXES ".so") find_package(google_cloud_cpp_storage REQUIRED) endmacro() macro(find_azure) find_package(CURL REQUIRED) find_package(LibXml2 REQUIRED) - set (CMAKE_FIND_LIBRARY_SUFFIXES ".a") + set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") find_package(azure-storage-blobs-cpp CONFIG REQUIRED) find_package(azure-storage-files-datalake-cpp CONFIG REQUIRED) endmacro() @@ -327,12 +529,11 @@ set(VELOX_SRCS utils/VeloxArrowUtils.cc utils/ConfigExtractor.cc utils/Common.cc - utils/VeloxBatchAppender.cc - ) + utils/VeloxBatchAppender.cc) -if (ENABLE_HDFS) +if(ENABLE_HDFS) list(APPEND VELOX_SRCS utils/HdfsUtils.cc) -endif () +endif() if(ENABLE_S3) find_package(ZLIB) @@ -346,30 +547,30 @@ add_library(velox SHARED ${VELOX_SRCS}) if(ENABLE_GLUTEN_VCPKG) # Hide symbols of static dependencies - target_link_options(velox PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map) + target_link_options( + velox PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map) endif() -target_include_directories(velox PUBLIC - ${CMAKE_SYSTEM_INCLUDE_PATH} - ${JNI_INCLUDE_DIRS} - ${CMAKE_CURRENT_SOURCE_DIR} - ${VELOX_HOME}/ - ${VELOX_BUILD_PATH}/ - ${VELOX_BUILD_PATH}/_deps/xsimd-src/include/ - ${VELOX_HOME}/third_party/xsimd/include/) - -set_target_properties(velox PROPERTIES - LIBRARY_OUTPUT_DIRECTORY ${root_directory}/releases - ) - -## If folly is not installed in system lib paths, please add -## `-DCMAKE_PREFIX_PATH="${folly lib path}" to cmake arguments. -## It is also applicable to other dependencies. +target_include_directories( + velox + PUBLIC ${CMAKE_SYSTEM_INCLUDE_PATH} + ${JNI_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR} + ${VELOX_HOME}/ + ${VELOX_BUILD_PATH}/ + ${VELOX_BUILD_PATH}/_deps/xsimd-src/include/ + ${VELOX_HOME}/third_party/xsimd/include/) + +set_target_properties(velox PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${root_directory}/releases) + +# If folly is not installed in system lib paths, please add +# `-DCMAKE_PREFIX_PATH="${folly lib path}" to cmake arguments. It is also +# applicable to other dependencies. find_package(Folly REQUIRED CONFIG) -target_include_directories(velox PUBLIC - ${GTEST_INCLUDE_DIRS} - ${PROTOBUF_INCLUDE}) +target_include_directories(velox PUBLIC ${GTEST_INCLUDE_DIRS} + ${PROTOBUF_INCLUDE}) target_link_libraries(velox PUBLIC gluten) add_velox_dependencies() @@ -383,11 +584,13 @@ target_link_libraries(velox PUBLIC Folly::folly) find_re2() target_link_libraries(velox PUBLIC ${RE2_LIBRARY}) -# since https://github.com/facebookincubator/velox/commit/47970417ac92135e862c0fde350d4d60fa2f1423 +# since +# https://github.com/facebookincubator/velox/commit/47970417ac92135e862c0fde350d4d60fa2f1423 if(Stemmer_FOUND) target_link_libraries(velox PUBLIC stemmer::stemmer) else() - add_velox_dependency(velox "${VELOX_BUILD_PATH}/_deps/libstemmer/src/libstemmer/libstemmer.a") + add_velox_dependency( + velox "${VELOX_BUILD_PATH}/_deps/libstemmer/src/libstemmer/libstemmer.a") endif() set(CMAKE_FIND_LIBRARY_SUFFIXES_BCK ${CMAKE_FIND_LIBRARY_SUFFIXES}) @@ -396,7 +599,8 @@ find_package(simdjson CONFIG) if(simdjson_FOUND AND TARGET simdjson::simdjson) target_link_libraries(velox PUBLIC simdjson::simdjson) else() - add_velox_dependency(external::simdjson "${VELOX_BUILD_PATH}/_deps/simdjson-build/libsimdjson.a") + add_velox_dependency(external::simdjson + "${VELOX_BUILD_PATH}/_deps/simdjson-build/libsimdjson.a") endif() set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_BCK}) @@ -411,7 +615,8 @@ endif() if(Thrift_FOUND) target_link_libraries(velox PUBLIC thrift::thrift) else() - add_velox_dependency(thrift "${ARROW_HOME}/src/arrow_ep-build/thrift_ep-install/lib/libthrift.a") + add_velox_dependency( + thrift "${ARROW_HOME}/src/arrow_ep-build/thrift_ep-install/lib/libthrift.a") endif() if(BUILD_TESTS) @@ -451,7 +656,13 @@ if(ENABLE_ABFS) target_link_libraries(velox PUBLIC Azure::azure-storage-files-datalake) endif() -add_custom_command(TARGET velox POST_BUILD COMMAND ld $ || true - COMMENT "Checking ld result of libvelox.so") -add_custom_command(TARGET velox POST_BUILD COMMAND ldd $ || true - COMMENT "Checking ldd result of libvelox.so") +add_custom_command( + TARGET velox + POST_BUILD + COMMAND ld $ || true + COMMENT "Checking ld result of libvelox.so") +add_custom_command( + TARGET velox + POST_BUILD + COMMAND ldd $ || true + COMMENT "Checking ldd result of libvelox.so") diff --git a/cpp/velox/benchmarks/CMakeLists.txt b/cpp/velox/benchmarks/CMakeLists.txt index 74f21c29bc1d..903ec0d65825 100644 --- a/cpp/velox/benchmarks/CMakeLists.txt +++ b/cpp/velox/benchmarks/CMakeLists.txt @@ -15,10 +15,15 @@ find_arrow_lib(${PARQUET_LIB_NAME}) -set(VELOX_BENCHMARK_COMMON_SRCS common/FileReaderIterator.cc common/BenchmarkUtils.cc) +set(VELOX_BENCHMARK_COMMON_SRCS common/FileReaderIterator.cc + common/BenchmarkUtils.cc) add_library(velox_benchmark_common STATIC ${VELOX_BENCHMARK_COMMON_SRCS}) -target_include_directories(velox_benchmark_common PUBLIC ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/core) -target_link_libraries(velox_benchmark_common PUBLIC Arrow::parquet velox benchmark::benchmark google::glog) +target_include_directories( + velox_benchmark_common PUBLIC ${CMAKE_SOURCE_DIR}/velox + ${CMAKE_SOURCE_DIR}/core) +target_link_libraries( + velox_benchmark_common PUBLIC Arrow::parquet velox benchmark::benchmark + google::glog) function(add_velox_benchmark BM_EXEC BM_FILE) add_executable(${BM_EXEC} ${BM_FILE}) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 4e875d4790e5..8b8a9262403c 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -20,6 +20,7 @@ #include "VariantToVectorConverter.h" #include "velox/connectors/hive/HiveDataSink.h" #include "velox/exec/TableWriter.h" +#include "velox/type/Filter.h" #include "velox/type/Type.h" #include "utils/ConfigExtractor.h" @@ -1465,10 +1466,12 @@ connector::hive::SubfieldFilters SubstraitToVeloxPlanConverter::createSubfieldFi auto expr = scalarFunction.arguments()[0].value(); if (expr.has_scalar_function()) { // Set its child to filter info with reverse enabled. - setFilterInfo(scalarFunction.arguments()[0].value().scalar_function(), inputTypeList, columnToFilterInfo, true); + setFilterInfo(expr.scalar_function(), inputTypeList, columnToFilterInfo, true); + } else if (expr.has_singular_or_list()) { + auto singularOrList = expr.singular_or_list(); + setFilterInfo(singularOrList, columnToFilterInfo, true); } else { - // TODO: support push down of Not In. - VELOX_NYI("Scalar function expected."); + VELOX_NYI("Only support push down Not with scalar function or In."); } } else if (filterName == sOr) { VELOX_CHECK(scalarFunction.arguments().size() == 2); @@ -1593,24 +1596,26 @@ bool SubstraitToVeloxPlanConverter::canPushdownNot( std::vector& rangeRecorders) { VELOX_CHECK(scalarFunction.arguments().size() == 1, "Only one arg is expected for Not."); const auto& notArg = scalarFunction.arguments()[0]; - if (!notArg.value().has_scalar_function()) { - // Not for a Boolean Literal or Or List is not supported curretly. - // It can be pushed down with an AlwaysTrue or AlwaysFalse Range. - return false; - } - - auto argFunction = - SubstraitParser::findFunctionSpec(functionMap_, notArg.value().scalar_function().function_reference()); - auto functionName = SubstraitParser::getNameBeforeDelimiter(argFunction); + if (notArg.value().has_singular_or_list()) { + auto singularOrList = notArg.value().singular_or_list(); + if (!canPushdownSingularOrList(singularOrList)) { + return false; + } + uint32_t colIdx = getColumnIndexFromSingularOrList(singularOrList); + return rangeRecorders.at(colIdx).setInRange(); + } else if (notArg.value().has_scalar_function()) { + auto argFunction = + SubstraitParser::findFunctionSpec(functionMap_, notArg.value().scalar_function().function_reference()); + auto functionName = SubstraitParser::getNameBeforeDelimiter(argFunction); - static const std::unordered_set supportedNotFunctions = {sGte, sGt, sLte, sLt, sEqual}; + static const std::unordered_set supportedNotFunctions = {sGte, sGt, sLte, sLt, sEqual}; - uint32_t fieldIdx; - bool isFieldOrWithLiteral = fieldOrWithLiteral(notArg.value().scalar_function().arguments(), fieldIdx); + uint32_t fieldIdx; + bool isFieldOrWithLiteral = fieldOrWithLiteral(notArg.value().scalar_function().arguments(), fieldIdx); - if (supportedNotFunctions.find(functionName) != supportedNotFunctions.end() && isFieldOrWithLiteral && - rangeRecorders.at(fieldIdx).setCertainRangeForFunction(functionName, true /*reverse*/)) { - return true; + return ( + supportedNotFunctions.find(functionName) != supportedNotFunctions.end() && isFieldOrWithLiteral && + rangeRecorders.at(fieldIdx).setCertainRangeForFunction(functionName, true /*reverse*/)); } return false; } @@ -1966,6 +1971,7 @@ template void SubstraitToVeloxPlanConverter::setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters) {} @@ -1973,6 +1979,7 @@ template <> void SubstraitToVeloxPlanConverter::setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters) { std::vector values; @@ -1981,13 +1988,18 @@ void SubstraitToVeloxPlanConverter::setInFilter( int64_t value = variant.value(); values.emplace_back(value); } - filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + if (negated) { + filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed); + } else { + filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + } } template <> void SubstraitToVeloxPlanConverter::setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters) { // Use bigint values for int type. @@ -1998,13 +2010,18 @@ void SubstraitToVeloxPlanConverter::setInFilter( int64_t value = variant.value(); values.emplace_back(value); } - filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + if (negated) { + filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed); + } else { + filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + } } template <> void SubstraitToVeloxPlanConverter::setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters) { // Use bigint values for small int type. @@ -2015,13 +2032,18 @@ void SubstraitToVeloxPlanConverter::setInFilter( int64_t value = variant.value(); values.emplace_back(value); } - filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + if (negated) { + filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed); + } else { + filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + } } template <> void SubstraitToVeloxPlanConverter::setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters) { // Use bigint values for tiny int type. @@ -2032,13 +2054,18 @@ void SubstraitToVeloxPlanConverter::setInFilter( int64_t value = variant.value(); values.emplace_back(value); } - filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + if (negated) { + filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed); + } else { + filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed); + } } template <> void SubstraitToVeloxPlanConverter::setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters) { std::vector values; @@ -2047,7 +2074,11 @@ void SubstraitToVeloxPlanConverter::setInFilter( std::string value = variant.value(); values.emplace_back(value); } - filters[common::Subfield(inputName)] = std::make_unique(values, nullAllowed); + if (negated) { + filters[common::Subfield(inputName)] = std::make_unique(values, nullAllowed); + } else { + filters[common::Subfield(inputName)] = std::make_unique(values, nullAllowed); + } } template @@ -2102,6 +2133,17 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( if (filterInfo.notValue_) { filters[common::Subfield(inputName)] = std::make_unique(!filterInfo.notValue_.value().value(), nullAllowed); + } else if (filterInfo.notValues_.size() > 0) { + std::set notValues; + for (auto v : filterInfo.notValues_) { + notValues.emplace(v.value()); + } + if (notValues.size() == 1) { + filters[common::Subfield(inputName)] = std::make_unique(!(*notValues.begin()), nullAllowed); + } else { + // if there are more than one distinct value in NOT IN list, the filter should be AlwaysFalse + filters[common::Subfield(inputName)] = std::make_unique(); + } } else if (rangeSize == 0) { // IsNull/IsNotNull. if (!nullAllowed) { @@ -2140,11 +2182,22 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( if (filterInfo.values_.size() > 0) { // To filter out null is a default behaviour of Spark IN expression. nullAllowed = false; - setInFilter(filterInfo.values_, nullAllowed, inputName, filters); + setInFilter(filterInfo.values_, nullAllowed, false, inputName, filters); // Currently, In cannot coexist with other filter conditions // due to multirange is in 'OR' relation but 'AND' is needed. VELOX_CHECK(rangeSize == 0, "LowerBounds or upperBounds conditons cannot be supported after IN filter."); VELOX_CHECK(!filterInfo.notValue_.has_value(), "Not equal cannot be supported after IN filter."); + VELOX_CHECK(filterInfo.notValues_.size() == 0, "Not in cannot be supported after IN filter."); + return; + } + + // Handle not in filter. + if (filterInfo.notValues_.size() > 0) { + setInFilter(filterInfo.notValues_, filterInfo.nullAllowed_, true, inputName, filters); + // Currently, NOT In cannot coexist with other filter conditions + // due to multirange is in 'OR' relation but 'AND' is needed. + VELOX_CHECK(rangeSize == 0, "LowerBounds or upperBounds conditons cannot be supported after NOT IN filter."); + VELOX_CHECK(!filterInfo.notValue_.has_value(), "Not equal cannot be supported after NOT IN filter."); return; } @@ -2429,7 +2482,8 @@ uint32_t SubstraitToVeloxPlanConverter::getColumnIndexFromSingularOrList( void SubstraitToVeloxPlanConverter::setFilterInfo( const ::substrait::Expression_SingularOrList& singularOrList, - std::vector& columnToFilterInfo) { + std::vector& columnToFilterInfo, + bool reverse) { VELOX_CHECK(singularOrList.options_size() > 0, "At least one option is expected."); // Get the column index. uint32_t colIdx = getColumnIndexFromSingularOrList(singularOrList); @@ -2443,7 +2497,11 @@ void SubstraitToVeloxPlanConverter::setFilterInfo( variants.emplace_back(exprConverter_->toVeloxExpr(option.literal())->value()); } // Set the value list to filter info. - columnToFilterInfo[colIdx].setValues(variants); + if (!reverse) { + columnToFilterInfo[colIdx].setValues(variants); + } else { + columnToFilterInfo[colIdx].setNotValues(variants); + } } } // namespace gluten diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index 3a0e677afeaa..1535b1f85f51 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -377,6 +377,16 @@ class SubstraitToVeloxPlanConverter { } } + // Set a list of values to be used in the push down of 'not in' expression. + void setNotValues(const std::vector& notValues) { + for (const auto& value : notValues) { + notValues_.emplace_back(value); + } + if (!initialized_) { + initialized_ = true; + } + } + // Whether this filter map is initialized. bool initialized_ = false; @@ -402,6 +412,9 @@ class SubstraitToVeloxPlanConverter { // The list of values used in 'in' expression. std::vector values_; + + // The list of values should not be equal to. + std::vector notValues_; }; /// Returns unique ID to use for plan node. Produces sequential numbers @@ -464,9 +477,11 @@ class SubstraitToVeloxPlanConverter { bool reverse = false); /// Extract SingularOrList and set it to the filter info map. + /// If reverse is true, the opposite filter info will be set. void setFilterInfo( const ::substrait::Expression_SingularOrList& singularOrList, - std::vector& columnToFilterInfo); + std::vector& columnToFilterInfo, + bool reverse = false); /// Extract SingularOrList and returns the field index. static uint32_t getColumnIndexFromSingularOrList(const ::substrait::Expression_SingularOrList&); @@ -484,13 +499,15 @@ class SubstraitToVeloxPlanConverter { template void createNotEqualFilter(variant notVariant, bool nullAllowed, std::vector>& colFilters); - /// Create a values range to handle in filter. - /// variants: the list of values extracted from the in expression. + /// Create a values range to handle (not) in filter. + /// variants: the list of values extracted from the (not) in expression. + // negated: false for IN filter, true for NOT IN filter. /// inputName: the column input name. template void setInFilter( const std::vector& variants, bool nullAllowed, + bool negated, const std::string& inputName, connector::hive::SubfieldFilters& filters); diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt index 29beb69da220..f3d65f127f67 100644 --- a/cpp/velox/tests/CMakeLists.txt +++ b/cpp/velox/tests/CMakeLists.txt @@ -16,14 +16,9 @@ function(add_velox_test TEST_EXEC) set(options) set(one_value_args) - set(multi_value_args - SOURCES - ) - cmake_parse_arguments(ARG - "${options}" - "${one_value_args}" - "${multi_value_args}" - ${ARGN}) + set(multi_value_args SOURCES) + cmake_parse_arguments(ARG "${options}" "${one_value_args}" + "${multi_value_args}" ${ARGN}) if(ARG_SOURCES) set(SOURCES ${ARG_SOURCES}) @@ -31,34 +26,34 @@ function(add_velox_test TEST_EXEC) message(FATAL_ERROR "No sources specified for test ${TEST_NAME}") endif() add_executable(${TEST_EXEC} ${SOURCES} ${VELOX_TEST_COMMON_SRCS}) - target_include_directories(${TEST_EXEC} PRIVATE ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/src ${VELOX_BUILD_PATH}/_deps/duckdb-src/src/include) - target_link_libraries(${TEST_EXEC} velox_benchmark_common GTest::gtest GTest::gtest_main) + target_include_directories( + ${TEST_EXEC} PRIVATE ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/src + ${VELOX_BUILD_PATH}/_deps/duckdb-src/src/include) + target_link_libraries(${TEST_EXEC} velox_benchmark_common GTest::gtest + GTest::gtest_main) gtest_discover_tests(${TEST_EXEC} DISCOVERY_MODE PRE_TEST) endfunction() set(VELOX_TEST_COMMON_SRCS JsonToProtoConverter.cc FilePathGenerator.cc) add_velox_test(velox_shuffle_writer_test SOURCES VeloxShuffleWriterTest.cc) -# TODO: ORC is not well supported. -# add_velox_test(orc_test SOURCES OrcTest.cc) +# TODO: ORC is not well supported. add_velox_test(orc_test SOURCES OrcTest.cc) add_velox_test( - velox_operators_test - SOURCES - VeloxColumnarToRowTest.cc - VeloxRowToColumnarTest.cc - VeloxColumnarBatchSerializerTest.cc - VeloxColumnarBatchTest.cc) + velox_operators_test SOURCES VeloxColumnarToRowTest.cc + VeloxRowToColumnarTest.cc VeloxColumnarBatchSerializerTest.cc + VeloxColumnarBatchTest.cc) add_velox_test( - velox_plan_conversion_test - SOURCES - Substrait2VeloxPlanConversionTest.cc - Substrait2VeloxPlanValidatorTest.cc - Substrait2VeloxValuesNodeConversionTest.cc - SubstraitExtensionCollectorTest.cc - VeloxSubstraitRoundTripTest.cc - VeloxSubstraitSignatureTest.cc - VeloxToSubstraitTypeTest.cc) -add_velox_test(spark_functions_test SOURCES SparkFunctionTest.cc FunctionTest.cc) + velox_plan_conversion_test + SOURCES + Substrait2VeloxPlanConversionTest.cc + Substrait2VeloxPlanValidatorTest.cc + Substrait2VeloxValuesNodeConversionTest.cc + SubstraitExtensionCollectorTest.cc + VeloxSubstraitRoundTripTest.cc + VeloxSubstraitSignatureTest.cc + VeloxToSubstraitTypeTest.cc) +add_velox_test(spark_functions_test SOURCES SparkFunctionTest.cc + FunctionTest.cc) add_velox_test(execution_ctx_test SOURCES RuntimeTest.cc) add_velox_test(velox_memory_test SOURCES MemoryManagerTest.cc) add_velox_test(buffer_outputstream_test SOURCES BufferOutputStreamTest.cc) diff --git a/docs/developers/CppCodingStyle.md b/docs/developers/CppCodingStyle.md index 9dca4cf69fbc..42101882a9e5 100644 --- a/docs/developers/CppCodingStyle.md +++ b/docs/developers/CppCodingStyle.md @@ -28,11 +28,20 @@ Gluten CPP coding, there are a few Philosophical rules as the following. ## Code Formatting Many aspects of C++ coding style will be covered by clang-format, such as spacing, -line width, indentation and ordering (for includes, using directives and etc).  +line width, indentation and ordering (for includes, using directives and etc). * Always ensure your code is compatible with clang-format-15 for Velox backend. * `dev/formatcppcode.sh` is provided for formatting Velox CPP code. +To format cmake files like CMakeLists.txt & *.cmake, `cmake-format` is required to +be installed. Here is an example. + +``` +apt install python3-pip -y +pip3 install --user cmake-format +cmake-format --first-comment-is-literal True --in-place cpp/velox/CMakeLists.txt +``` + ## Naming Conventions * Use **PascalCase** for types (class, struct, enum, type alias, type