Skip to content

Commit

Permalink
[GLUTEN-7031][CORE] Initialize new module structure gluten-core / glu…
Browse files Browse the repository at this point in the history
…ten-substrait (#7057)

Closes #7031
  • Loading branch information
zhztheplayer authored Aug 29, 2024
1 parent 96130d1 commit 3af5bac
Show file tree
Hide file tree
Showing 338 changed files with 416 additions and 241 deletions.
1 change: 1 addition & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ CORE:
- changed-files:
- any-glob-to-any-file: [
'gluten-core/**/*',
'gluten-substrait/**/*',
'shims/**/*',
'gluten-ras/**/*',
'gluten-ui/**/*',
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/clickhouse_be_trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ on:
- 'gluten-celeborn/package/**'
- 'gluten-celeborn/clickhouse/**'
- 'gluten-core/**'
- 'gluten-substrait/**'
- 'gluten-ut/**'
- 'shims/**'
- 'tools/gluten-it/**'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/velox_backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ on:
- 'gluten-celeborn/velox/**'
- 'gluten-ras/**'
- 'gluten-core/**'
- 'gluten-substrait/**'
- 'gluten-data/**'
- 'gluten-delta/**'
- 'gluten-iceberg/**'
Expand Down
4 changes: 2 additions & 2 deletions backends-clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
<exclusions>
Expand All @@ -33,7 +33,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ object CHUTSoftAffinityManager extends AffinityManager {
override lazy val minOnTargetHosts: Int =
GlutenConfig.GLUTEN_SOFT_AFFINITY_MIN_TARGET_HOSTS_DEFAULT_VALUE

override lazy val detectDuplicateReading = true
override lazy val detectDuplicateReading: Boolean = true

override lazy val duplicateReadingMaxCacheItems =
override lazy val duplicateReadingMaxCacheItems: Int =
GlutenConfig.GLUTEN_SOFT_AFFINITY_DUPLICATE_READING_MAX_CACHE_ITEMS_DEFAULT_VALUE
}
10 changes: 2 additions & 8 deletions backends-velox/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
Expand All @@ -45,7 +45,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand Down Expand Up @@ -78,12 +78,6 @@
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>spark-sql-columnar-shims-common</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion cpp-ch/local-engine/proto/substrait
4 changes: 2 additions & 2 deletions cpp/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ endif()
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})

set(SUBSTRAIT_PROTO_SRC_DIR
${GLUTEN_HOME}/gluten-core/src/main/resources/substrait/proto)
${GLUTEN_HOME}/gluten-substrait/src/main/resources/substrait/proto)
message(STATUS "Set Substrait Proto Directory in ${SUBSTRAIT_PROTO_SRC_DIR}")

set(GLUTEN_PROTO_SRC_DIR
${GLUTEN_HOME}/gluten-core/src/main/resources/org/apache/gluten/proto)
${GLUTEN_HOME}/gluten-substrait/src/main/resources/org/apache/gluten/proto)
message(STATUS "Set Gluten Proto Directory in ${GLUTEN_PROTO_SRC_DIR}")

find_program(CCACHE_FOUND ccache)
Expand Down
2 changes: 1 addition & 1 deletion docs/developers/HowTo.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ transforms Spark plan to Substrait plan, and then send the Substrait plan to the

The Gluten codes consist of two parts: the C++ codes and the Java/Scala codes.
1. All C++ codes are placed under the directory of `${GLUTEN_HOME}/cpp`, the Java/Scala codes are placed under several directories, such as
`${GLUTEN_HOME}/gluten-core` `${GLUTEN_HOME}/gluten-data` `${GLUTEN_HOME}/backends-velox`.
`${GLUTEN_HOME}/gluten-substrait` `${GLUTEN_HOME}/gluten-data` `${GLUTEN_HOME}/backends-velox`.
2. The Java/Scala codes are responsible for validating and transforming the execution plan. Source data should also be provided, the source data may
come from files or other forms such as networks.
3. The C++ codes take the Substrait plan and the source data as inputs and transform the Substrait plan to the corresponding backend plan. If the backend
Expand Down
6 changes: 3 additions & 3 deletions docs/get-started/ClickHouse.md
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,11 @@ This benchmark is tested on AWS EC2 cluster, there are 7 EC2 instances:

Refer to [Deploy Spark 3.2.2](#deploy-spark-322)

- Deploy gluten-core-XXXXX-jar-with-dependencies.jar
- Deploy gluten-substrait-XXXXX-jar-with-dependencies.jar

```
#deploy 'gluten-core-XXXXX-jar-with-dependencies.jar' to every node, and then
cp gluten-core-XXXXX-jar-with-dependencies.jar /path_to_spark/jars/
#deploy 'gluten-substrait-XXXXX-jar-with-dependencies.jar' to every node, and then
cp gluten-substrait-XXXXX-jar-with-dependencies.jar /path_to_spark/jars/
```

- Deploy ClickHouse library
Expand Down
2 changes: 1 addition & 1 deletion gluten-celeborn/clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
Expand Down
2 changes: 1 addition & 1 deletion gluten-celeborn/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<artifactId>gluten-substrait</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
Expand Down
200 changes: 5 additions & 195 deletions gluten-core/pom.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<artifactId>gluten-parent</artifactId>
<groupId>org.apache.gluten</groupId>
Expand All @@ -10,19 +9,15 @@
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-core</artifactId>
<packaging>jar</packaging>
<name>Gluten Core</name>

<properties>
<build.testJarPhase>none</build.testJarPhase>
<build.copyDependenciesPhase>package</build.copyDependenciesPhase>
<jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-ui</artifactId>
<artifactId>gluten-ras-common</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
Expand All @@ -32,7 +27,7 @@
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-ras-common</artifactId>
<artifactId>spark-sql-columnar-shims-common</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
Expand Down Expand Up @@ -121,203 +116,18 @@
<version>3.1.0.0-RC2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-htmlunit-driver</artifactId>
<version>2.52.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.trino.tpch</groupId>
<artifactId>tpch</artifactId>
<version>1.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.trino.tpcds</groupId>
<artifactId>tpcds</artifactId>
<version>1.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.kohsuke</groupId>
<artifactId>github-api</artifactId>
<version>1.117</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-api</artifactId>
<version>0.10.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-impl</artifactId>
<version>0.10.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.jsonwebtoken</groupId>
<artifactId>jjwt-jackson</artifactId>
<version>0.10.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.knowm.xchart</groupId>
<artifactId>xchart</artifactId>
<version>3.6.5</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>de.erichseifert.vectorgraphics2d</groupId>
<artifactId>VectorGraphics2D</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>spark-sql-columnar-shims-common</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.glutenproject</groupId>
<artifactId>protobuf-java-util</artifactId>
<version>${custom.protobuf.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
<scope>provided</scope>
</dependency>

<!-- Fasterxml -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
</dependency>
<!-- Java Faker for generating random data -->
<dependency>
<groupId>com.github.javafaker</groupId>
<artifactId>javafaker</artifactId>
<version>1.0.2</version>
<scope>test</scope>
</dependency>
</dependencies>

<profiles>
<profile>
<id>backends-velox</id>
<properties>
<backend.type>velox</backend.type>
<backend.home>${project.basedir}/../ep/build-velox/build/velox_ep</backend.home>
</properties>
</profile>
<profile>
<id>backends-clickhouse</id>
<properties>
<backend.type>ch</backend.type>
<backend.home>${project.basedir}/../cpp-ch/ClickHouse</backend.home>
</properties>
</profile>
</profiles>

<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<resources>
<resource>
<filtering>true</filtering>
<!-- Include the properties file to provide the build information. -->
<directory>${project.build.directory}/extra-resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>build-info</id>
<goals>
<goal>run</goal>
</goals>
<phase>generate-resources</phase>
<configuration>
<target>
<exec executable="bash" osfamily="unix">
<arg value="${project.basedir}/../dev/gluten-build-info.sh"/>
<arg value="${backend.type}"/>
<arg value="${backend.home}"/>
<arg value="${project.build.directory}/extra-resources"/>
<arg value="${project.version}"/>
<arg value="${java.version}"/>
<arg value="${scala.version}"/>
<arg value="${spark.version}"/>
<arg value="${hadoop.version}"/>
</exec>
</target>
</configuration>
</execution>
</executions>
</plugin>
<!-- compile proto buffer files using copied protoc binary -->
<plugin>
<groupId>org.xolstice.maven.plugins</groupId>
<artifactId>protobuf-maven-plugin</artifactId>
<executions>
<execution>
<id>compile-substrait-proto</id>
<phase>generate-sources</phase>
<goals>
<goal>compile</goal>
<goal>test-compile</goal>
</goals>
<configuration>
<protocArtifact>
com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
</protocArtifact>
<protoSourceRoot>src/main/resources/substrait/proto</protoSourceRoot>
<clearOutputDirectory>true</clearOutputDirectory>
</configuration>
</execution>
<execution>
<id>compile-gluten-proto</id>
<phase>generate-sources</phase>
<goals>
<goal>compile</goal>
<goal>test-compile</goal>
</goals>
<configuration>
<protocArtifact>
com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
</protocArtifact>
<protoSourceRoot>src/main/resources/org/apache/gluten/proto</protoSourceRoot>
<clearOutputDirectory>false</clearOutputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
Expand Down
Loading

0 comments on commit 3af5bac

Please sign in to comment.