Merge branch 'main' into remove_broadcast_config

apache · Mar 26, 2024 · d25c21d · d25c21d
2 parents 86167fd + 0826772
commit d25c21d
Show file tree

Hide file tree

Showing 90 changed files with 25,445 additions and 3,359 deletions.
diff --git a/.github/actions/java-test/action.yaml b/.github/actions/java-test/action.yaml
@@ -49,7 +49,7 @@ runs:
     - name: Run Maven compile
       shell: bash
       run: |
-        ./mvnw -B compile test-compile scalafix:scalafix -Psemanticdb ${{ inputs.maven_opts }}
+        ./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb ${{ inputs.maven_opts }}
 
     - name: Run tests
       shell: bash

diff --git a/.github/workflows/benchmark-tpch.yml b/.github/workflows/benchmark-tpch.yml
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: TPC-H Correctness
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+on:
+  push:
+    paths-ignore:
+      - "doc/**"
+      - "**.md"
+  pull_request:
+    paths-ignore:
+      - "doc/**"
+      - "**.md"
+  # manual trigger
+  # https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+  workflow_dispatch:
+
+env:
+  RUST_VERSION: nightly
+
+jobs:
+  prepare:
+    name: Build native and prepare data
+    runs-on: ubuntu-latest
+    container:
+      image: amd64/rust
+    env:
+      JAVA_VERSION: 11
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Rust & Java toolchain
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{env.RUST_VERSION}}
+          jdk-version: 11
+      - name: Cache Maven dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.m2/repository
+            /root/.m2/repository
+          key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-java-maven-
+      - name: Cache TPC-H generated data
+        id: cache-tpch-sf-1
+        uses: actions/cache@v4
+        with:
+          path: ./tpch
+          key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }}
+      - name: Build Comet
+        run: make release
+      - name: Upload Comet native lib
+        uses: actions/upload-artifact@v4
+        with:
+          name: libcomet-${{ github.run_id }}
+          path: |
+            core/target/release/libcomet.so
+            core/target/release/libcomet.dylib
+          retention-days: 1 # remove the artifact after 1 day, only valid for this workflow
+          overwrite: true
+      - name: Generate TPC-H (SF=1) table data
+        if: steps.cache-tpch-sf-1.outputs.cache-hit != 'true'
+        run: |
+          cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw exec:java -Dexec.mainClass="org.apache.spark.sql.GenTPCHData" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="--location `pwd`/.. --scaleFactor 1 --numPartitions 1 --overwrite"
+          cd ..
+
+  benchmark:
+    name: Run TPCHQuerySuite
+    runs-on: ubuntu-latest
+    needs: [prepare]
+    container:
+      image: amd64/rust
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Rust & Java toolchain
+        uses: ./.github/actions/setup-builder
+        with:
+          rust-version: ${{env.RUST_VERSION}}
+          jdk-version: 11
+      - name: Cache Maven dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.m2/repository
+            /root/.m2/repository
+          key: ${{ runner.os }}-java-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-java-maven-
+      - name: Restore TPC-H generated data
+        id: cache-tpch-sf-1
+        uses: actions/cache/restore@v4
+        with:
+          path: ./tpch
+          key: tpch-${{ hashFiles('.github/workflows/benchmark-tpch.yml') }}
+          fail-on-cache-miss: true # it's always be cached as it should be generated by pre-step if not existed
+      - name: Download Comet native lib
+        uses: actions/download-artifact@v4
+        with:
+          name: libcomet-${{ github.run_id }}
+          path: core/target/release
+      - name: Run TPC-H queries
+        run: |
+          SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -84,3 +84,7 @@ in the respective source code, e.g., `CometTPCHQueryBenchmark`.
 ## Debugging
 Comet is a multi-language project with native code written in Rust and JVM code written in Java and Scala.
 It is possible to debug both native and JVM code concurrently as described in the [DEBUGGING guide](DEBUGGING.md)
+
+## Submitting a Pull Request
+Comet uses `cargo fmt`, [Scalafix](https://github.com/scalacenter/scalafix) and [Spotless](https://github.com/diffplug/spotless/tree/main/plugin-maven) to 
+automatically format the code. Before submitting a pull request, you can simply run `make format` to format the code.
diff --git a/Makefile b/Makefile
@@ -33,11 +33,12 @@ test-jvm: core
 test: test-rust test-jvm
 clean:
 	cd core && cargo clean
-	./mvnw clean
+	./mvnw clean $(PROFILES)
 	rm -rf .dist
 bench:
 	cd core && RUSTFLAGS="-Ctarget-cpu=native" cargo bench $(filter-out $@,$(MAKECMDGOALS))
 format:
+	cd core && cargo fmt
 	./mvnw compile test-compile scalafix:scalafix -Psemanticdb $(PROFILES)
 	./mvnw spotless:apply $(PROFILES)
 

diff --git a/README.md b/README.md
@@ -113,4 +113,21 @@ INFO src/lib.rs: Comet native library initialized
           +- CometScan parquet [a#14] Batched: true, DataFilters: [isnotnull(a#14), (a#14 > 5)], 
              Format: CometParquet, Location: InMemoryFileIndex(1 paths)[file:/tmp/test], PartitionFilters: [], 
              PushedFilters: [IsNotNull(a), GreaterThan(a,5)], ReadSchema: struct<a:int>
-```
+```
+
+### Enable Comet shuffle
+
+Comet shuffle feature is disabled by default. To enable it, please add related configs:
+
+```
+--conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager
+--conf spark.comet.exec.shuffle.enabled=true
+```
+
+Above configs enable Comet native shuffle which only supports hash partiting and single partition.
+Comet native shuffle doesn't support complext types yet.
+
+To enable columnar shuffle which supports all partitioning and basic complex types, one more config is required:
+```
+--conf spark.comet.columnar.shuffle.enabled=true
+```
diff --git a/bin/comet-spark-shell b/bin/comet-spark-shell
@@ -81,4 +81,6 @@ RUST_BACKTRACE=1 $SPARK_HOME/bin/spark-shell \
   --conf spark.comet.enabled=true \
   --conf spark.comet.exec.enabled=true \
   --conf spark.comet.exec.all.enabled=true \
+  --conf spark.comet.exec.shuffle.enabled=true \
+  --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
 $@
diff --git a/common/src/main/java/org/apache/comet/CometArrowStreamWriter.java b/common/src/main/java/org/apache/comet/CometArrowStreamWriter.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+
+/**
+ * A custom `ArrowStreamWriter` that allows writing batches from different root to the same stream.
+ * Arrow `ArrowStreamWriter` cannot change the root after initialization.
+ */
+public class CometArrowStreamWriter extends ArrowStreamWriter {
+  public CometArrowStreamWriter(
+      VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+    super(root, provider, out);
+  }
+
+  public void writeMoreBatch(VectorSchemaRoot root) throws IOException {
+    VectorUnloader unloader =
+        new VectorUnloader(
+            root, /*includeNullCount*/ true, NoCompressionCodec.INSTANCE, /*alignBuffers*/ true);
+
+    try (ArrowRecordBatch batch = unloader.getRecordBatch()) {
+      writeRecordBatch(batch);
+    }
+  }
+}
diff --git a/common/src/main/java/org/apache/comet/vector/CometDictionary.java b/common/src/main/java/org/apache/comet/vector/CometDictionary.java
@@ -156,10 +156,9 @@ private void initialize() {
         binaries = new ByteArrayWrapper[numValues];
         for (int i = 0; i < numValues; i++) {
           // Need copying here since we re-use byte array for decimal
-          byte[] bytes = values.getBinaryDecimal(i);
-          byte[] copy = new byte[DECIMAL_BYTE_WIDTH];
-          System.arraycopy(bytes, 0, copy, 0, DECIMAL_BYTE_WIDTH);
-          binaries[i] = new ByteArrayWrapper(copy);
+          byte[] bytes = new byte[DECIMAL_BYTE_WIDTH];
+          bytes = values.copyBinaryDecimal(i, bytes);
+          binaries[i] = new ByteArrayWrapper(bytes);
         }
         break;
       default:

diff --git a/common/src/main/java/org/apache/comet/vector/CometPlainVector.java b/common/src/main/java/org/apache/comet/vector/CometPlainVector.java
@@ -105,15 +105,13 @@ public UTF8String getUTF8String(int rowId) {
       int length = Platform.getInt(null, offsetBufferAddress + (rowId + 1L) * 4L) - offset;
       return UTF8String.fromAddress(null, valueBufferAddress + offset, length);
     } else {
-      // Iceberg maps UUID to StringType.
-      // The data type here must be UUID because the only FLBA -> String mapping we have is UUID.
       BaseFixedWidthVector fixedWidthVector = (BaseFixedWidthVector) valueVector;
       int length = fixedWidthVector.getTypeWidth();
       int offset = rowId * length;
       byte[] result = new byte[length];
       Platform.copyMemory(
           null, valueBufferAddress + offset, result, Platform.BYTE_ARRAY_OFFSET, length);
-      return UTF8String.fromString(convertToUuid(result).toString());
+      return UTF8String.fromBytes(result);
     }
   }
 

diff --git a/common/src/main/java/org/apache/comet/vector/CometVector.java b/common/src/main/java/org/apache/comet/vector/CometVector.java
@@ -98,22 +98,30 @@ public Decimal getDecimal(int i, int precision, int scale) {
     }
   }
 
-  /** Reads a 16-byte byte array which are encoded big-endian for decimal128. */
+  /**
+   * Reads a 16-byte byte array which are encoded big-endian for decimal128 into internal byte
+   * array.
+   */
   byte[] getBinaryDecimal(int i) {
+    return copyBinaryDecimal(i, DECIMAL_BYTES);
+  }
+
+  /** Reads a 16-byte byte array which are encoded big-endian for decimal128. */
+  public byte[] copyBinaryDecimal(int i, byte[] dest) {
     long valueBufferAddress = getValueVector().getDataBuffer().memoryAddress();
     Platform.copyMemory(
         null,
         valueBufferAddress + (long) i * DECIMAL_BYTE_WIDTH,
-        DECIMAL_BYTES,
+        dest,
         Platform.BYTE_ARRAY_OFFSET,
         DECIMAL_BYTE_WIDTH);
     // Decimal is stored little-endian in Arrow, so we need to reverse the bytes here
     for (int j = 0, k = DECIMAL_BYTE_WIDTH - 1; j < DECIMAL_BYTE_WIDTH / 2; j++, k--) {
-      byte tmp = DECIMAL_BYTES[j];
-      DECIMAL_BYTES[j] = DECIMAL_BYTES[k];
-      DECIMAL_BYTES[k] = tmp;
+      byte tmp = dest[j];
+      dest[j] = dest[k];
+      dest[k] = tmp;
     }
-    return DECIMAL_BYTES;
+    return dest;
   }
 
   @Override

diff --git a/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala b/common/src/main/scala/org/apache/comet/vector/NativeUtil.scala
@@ -20,6 +20,7 @@
 package org.apache.comet.vector
 
 import java.io.OutputStream
+import java.nio.channels.Channels
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -28,10 +29,11 @@ import org.apache.arrow.c.{ArrowArray, ArrowImporter, ArrowSchema, CDataDictiona
 import org.apache.arrow.memory.RootAllocator
 import org.apache.arrow.vector._
 import org.apache.arrow.vector.dictionary.DictionaryProvider
-import org.apache.arrow.vector.ipc.ArrowStreamWriter
 import org.apache.spark.SparkException
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
+import org.apache.comet.CometArrowStreamWriter
+
 class NativeUtil {
   private val allocator = new RootAllocator(Long.MaxValue)
   private val dictionaryProvider: CDataDictionaryProvider = new CDataDictionaryProvider
@@ -46,29 +48,27 @@ class NativeUtil {
    *   the output stream
    */
   def serializeBatches(batches: Iterator[ColumnarBatch], out: OutputStream): Long = {
-    var schemaRoot: Option[VectorSchemaRoot] = None
-    var writer: Option[ArrowStreamWriter] = None
+    var writer: Option[CometArrowStreamWriter] = None
     var rowCount = 0
 
     batches.foreach { batch =>
       val (fieldVectors, batchProviderOpt) = getBatchFieldVectors(batch)
-      val root = schemaRoot.getOrElse(new VectorSchemaRoot(fieldVectors.asJava))
+      val root = new VectorSchemaRoot(fieldVectors.asJava)
       val provider = batchProviderOpt.getOrElse(dictionaryProvider)
 
       if (writer.isEmpty) {
-        writer = Some(new ArrowStreamWriter(root, provider, out))
+        writer = Some(new CometArrowStreamWriter(root, provider, Channels.newChannel(out)))
         writer.get.start()
+        writer.get.writeBatch()
+      } else {
+        writer.get.writeMoreBatch(root)
       }
-      writer.get.writeBatch()
 
       root.clear()
-      schemaRoot = Some(root)
-
       rowCount += batch.numRows()
     }
 
     writer.map(_.end())
-    schemaRoot.map(_.close())
 
     rowCount
   }