Skip to content

Commit

Permalink
[CORE] Initialize new module structure gluten-core / gluten-substrait
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer committed Aug 28, 2024
1 parent 3928dc2 commit f9fb568
Show file tree
Hide file tree
Showing 601 changed files with 438 additions and 274 deletions.
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ DOCS:
CORE:
- changed-files:
- any-glob-to-any-file: [
'gluten-core/**/*',
'gluten-substrait/**/*',
'shims/**/*',
'gluten-ras/**/*',
'gluten-ui/**/*',
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/clickhouse_be_trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ on:
- 'gluten-celeborn/common/**'
- 'gluten-celeborn/package/**'
- 'gluten-celeborn/clickhouse/**'
- 'gluten-core/**'
- 'gluten-substrait/**'
- 'gluten-ut/**'
- 'shims/**'
- 'tools/gluten-it/**'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/velox_backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ on:
- 'gluten-celeborn/package/**'
- 'gluten-celeborn/velox/**'
- 'gluten-ras/**'
- 'gluten-core/**'
- 'gluten-substrait/**'
- 'gluten-data/**'
- 'gluten-delta/**'
- 'gluten-iceberg/**'
Expand Down
4 changes: 2 additions & 2 deletions backends-clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
Expand All @@ -33,7 +33,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ object RunTPCHTest {
FileUtils.forceMkdir(new File(warehouse))
FileUtils.forceMkdir(new File(metaStorePathAbsolute))

val resourcePath = rootPath + "../../../../gluten-core/src/test/resources/"
val resourcePath = rootPath + "../../../../gluten-substrait/src/test/resources/"
val queryPath = resourcePath + "/tpch-queries/"
// which sql to execute
val sqlFilePath = queryPath + "q01.sql"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ object CHUTSoftAffinityManager extends AffinityManager {
override lazy val minOnTargetHosts: Int =
GlutenConfig.GLUTEN_SOFT_AFFINITY_MIN_TARGET_HOSTS_DEFAULT_VALUE

override lazy val detectDuplicateReading = true
override lazy val detectDuplicateReading: Boolean = true

override lazy val duplicateReadingMaxCacheItems =
override lazy val duplicateReadingMaxCacheItems: Int =
GlutenConfig.GLUTEN_SOFT_AFFINITY_DUPLICATE_READING_MAX_CACHE_ITEMS_DEFAULT_VALUE
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class GlutenClickHouseDecimalSuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"
override protected val createNullableTables = true
override protected def createTPCHNotNullTables(): Unit = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class GlutenClickHouseFileFormatSuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

protected val orcDataPath: String = rootPath + "orc-data"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class GlutenClickHouseJoinSuite extends GlutenClickHouseWholeStageTransformerSui

protected val tablesPath: String = basePath + "/tpch-data"
protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
protected val queriesResults: String = rootPath + "queries-output"

private val joinAlgorithm = "spark.gluten.sql.columnar.backend.ch.runtime_settings.join_algorithm"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class GlutenClickHouseS3SourceSuite extends GlutenClickHouseTPCHAbstractSuite {

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

override protected def sparkConf: SparkConf = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ abstract class GlutenClickHouseTPCDSAbstractSuite
protected val tablesPath: String = UTSystemParameters.tpcdsDecimalDataPath + "/"
protected val db_name: String = "tpcdsdb"
protected val tpcdsQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpcds-queries/tpcds.queries.original"
rootPath + "../../../../gluten-substrait/src/test/resources/tpcds-queries/tpcds.queries.original"
protected val queriesResults: String = rootPath + "tpcds-decimal-queries-output"

/** Return values: (sql num, is fall back) */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ abstract class GlutenClickHouseTPCHAbstractSuite
protected val needCopyParquetToTablePath = false

protected val parquetTableDataPath: String =
"../../../../gluten-core/src/test/resources/tpch-data"
"../../../../gluten-substrait/src/test/resources/tpch-data"

protected val tablesPath: String
protected val tpchQueries: String
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class GlutenClickHouseTPCHBucketSuite

val parquetTablePath = basePath + "/tpch-data"
val parquetTableDataPath: String =
"../../../../gluten-core/src/test/resources/tpch-data"
"../../../../gluten-substrait/src/test/resources/tpch-data"
FileUtils.copyDirectory(new File(rootPath + parquetTableDataPath), new File(parquetTablePath))

createNotNullTPCHTablesInParquet(parquetTablePath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS

protected val tablesPath: String = basePath + "/tpch-data"
protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
protected val queriesResults: String = rootPath + "queries-output"

private var parquetPath: String = _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite {

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

override protected def createTPCHNotNullTables(): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class GlutenCustomAggExpressionSuite extends GlutenClickHouseTPCHAbstractSuite {

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

override protected def sparkConf: SparkConf = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class GlutenClickHouseTPCHMetricsSuite extends GlutenClickHouseTPCHAbstractSuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

protected val metricsJsonFilePath: String = rootPath + "metrics-json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class GlutenParquetFilterSuite
with Logging {

private val tpchQueriesResourceFolder: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"

override protected def sparkConf: SparkConf =
super.sparkConf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class GlutenClickHouseDatetimeExpressionSuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

/** Run Gluten + ClickHouse Backend with SortShuffleManager */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GlutenClickHouseHDFSSuite

override protected val tablesPath: String = HDFS_URL_ENDPOINT + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

private val hdfsCachePath = "/tmp/gluten_hdfs_cache/"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

/** Run Gluten + ClickHouse Backend with SortShuffleManager */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class GlutenClickHouseTPCHParquetAQEConcurrentSuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

/** Run Gluten + ClickHouse Backend with SortShuffleManager */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class GlutenClickHouseTPCHParquetAQESuite

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

/** Run Gluten + ClickHouse Backend with SortShuffleManager */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class GlutenClickHouseTPCHParquetBucketSuite

override protected val tablesPath: String = basePath + "/tpch-data-ch"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

protected val bucketTableResourcePath: String = rootPath + "tpch-data-bucket/parquet_bucket"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr

override protected val tablesPath: String = basePath + "/tpch-data"
override protected val tpchQueries: String =
rootPath + "../../../../gluten-core/src/test/resources/tpch-queries"
rootPath + "../../../../gluten-substrait/src/test/resources/tpch-queries"
override protected val queriesResults: String = rootPath + "queries-output"

protected val BACKEND_CONF_KEY = "spark.gluten.sql.columnar.backend.ch."
Expand Down
10 changes: 2 additions & 8 deletions backends-velox/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
Expand All @@ -45,7 +45,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand Down Expand Up @@ -78,12 +78,6 @@
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>spark-sql-columnar-shims-common</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class VeloxTPCDSSuite extends VeloxWholeStageTransformerSuite {
override protected val fileFormat: String = "parquet"

private val queryPath = System.getProperty("user.dir") +
"/gluten-core/src/test/resources/tpcds-queries/tpcds.queries.original/"
"/gluten-substrait/src/test/resources/tpcds-queries/tpcds.queries.original/"

protected var queryTables: Map[String, DataFrame] = _

Expand Down
4 changes: 2 additions & 2 deletions cpp/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ endif()
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})

set(SUBSTRAIT_PROTO_SRC_DIR
${GLUTEN_HOME}/gluten-core/src/main/resources/substrait/proto)
${GLUTEN_HOME}/gluten-substrait/src/main/resources/substrait/proto)
message(STATUS "Set Substrait Proto Directory in ${SUBSTRAIT_PROTO_SRC_DIR}")

set(GLUTEN_PROTO_SRC_DIR
${GLUTEN_HOME}/gluten-core/src/main/resources/org/apache/gluten/proto)
${GLUTEN_HOME}/gluten-substrait/src/main/resources/org/apache/gluten/proto)
message(STATUS "Set Gluten Proto Directory in ${GLUTEN_PROTO_SRC_DIR}")

find_program(CCACHE_FOUND ccache)
Expand Down
2 changes: 1 addition & 1 deletion docs/developers/HowTo.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ transforms Spark plan to Substrait plan, and then send the Substrait plan to the

The Gluten codes consist of two parts: the C++ codes and the Java/Scala codes.
1. All C++ codes are placed under the directory of `${GLUTEN_HOME}/cpp`, the Java/Scala codes are placed under several directories, such as
`${GLUTEN_HOME}/gluten-core` `${GLUTEN_HOME}/gluten-data` `${GLUTEN_HOME}/backends-velox`.
`${GLUTEN_HOME}/gluten-substrait` `${GLUTEN_HOME}/gluten-data` `${GLUTEN_HOME}/backends-velox`.
2. The Java/Scala codes are responsible for validating and transforming the execution plan. Source data should also be provided, the source data may
come from files or other forms such as networks.
3. The C++ codes take the Substrait plan and the source data as inputs and transform the Substrait plan to the corresponding backend plan. If the backend
Expand Down
6 changes: 3 additions & 3 deletions docs/get-started/ClickHouse.md
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,11 @@ This benchmark is tested on AWS EC2 cluster, there are 7 EC2 instances:

Refer to [Deploy Spark 3.2.2](#deploy-spark-322)

- Deploy gluten-core-XXXXX-jar-with-dependencies.jar
- Deploy gluten-substrait-XXXXX-jar-with-dependencies.jar

```
#deploy 'gluten-core-XXXXX-jar-with-dependencies.jar' to every node, and then
cp gluten-core-XXXXX-jar-with-dependencies.jar /path_to_spark/jars/
#deploy 'gluten-substrait-XXXXX-jar-with-dependencies.jar' to every node, and then
cp gluten-substrait-XXXXX-jar-with-dependencies.jar /path_to_spark/jars/
```

- Deploy ClickHouse library
Expand Down
6 changes: 3 additions & 3 deletions docs/get-started/Velox.md
Original file line number Diff line number Diff line change
Expand Up @@ -662,12 +662,12 @@ All TPC-H and TPC-DS queries are supported in Gluten Velox backend.
The data generation scripts are [TPC-H dategen script](../../tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.sh) and
[TPC-DS dategen script](../../tools/workload/tpcds/gen_data/parquet_dataset/tpcds_datagen_parquet.sh).

The used TPC-H and TPC-DS queries are the original ones, and can be accessed from [TPC-DS queries](../../gluten-core/src/test/resources/tpcds-queries/tpcds.queries.original)
and [TPC-H queries](../../gluten-core/src/test/resources/tpch-queries).
The used TPC-H and TPC-DS queries are the original ones, and can be accessed from [TPC-DS queries](../../gluten-substrait/src/test/resources/tpcds-queries/tpcds.queries.original)
and [TPC-H queries](../../gluten-substrait/src/test/resources/tpch-queries).

Some other versions of TPC-DS queries are also provided, but are **not** recommended for testing, including:

- the modified TPC-DS queries with "Decimal-to-Double": [TPC-DS non-decimal queries](../../gluten-core/src/test/resources/tpcds-queries/tpcds.queries.no-decimal) (outdated).
- the modified TPC-DS queries with "Decimal-to-Double": [TPC-DS non-decimal queries](../../gluten-substrait/src/test/resources/tpcds-queries/tpcds.queries.no-decimal) (outdated).

## Submit the Spark SQL job

Expand Down
2 changes: 1 addition & 1 deletion gluten-celeborn/clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GlutenClickHouseRSSColumnarMemorySortShuffleSuite
rootPath + "../../../../../backends-clickhouse/src/test/resources/mergetree-queries-output"

override protected val parquetTableDataPath: String =
"../../../../../gluten-core/src/test/resources/tpch-data"
"../../../../../gluten-substrait/src/test/resources/tpch-data"

/** Run Gluten + ClickHouse Backend with ColumnarShuffleManager */
override protected def sparkConf: SparkConf = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class GlutenClickHouseRSSColumnarShuffleAQESuite
rootPath + "../../../../../backends-clickhouse/src/test/resources/mergetree-queries-output"

override protected val parquetTableDataPath: String =
"../../../../../gluten-core/src/test/resources/tpch-data"
"../../../../../gluten-substrait/src/test/resources/tpch-data"

/** Run Gluten + ClickHouse Backend with ColumnarShuffleManager */
override protected def sparkConf: SparkConf = {
Expand Down
2 changes: 1 addition & 1 deletion gluten-celeborn/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
Expand Down
Loading

0 comments on commit f9fb568

Please sign in to comment.