Skip to content

Commit

Permalink
Merge branch 'main' into row-based-sort-followup
Browse files Browse the repository at this point in the history
  • Loading branch information
marin-ma authored Jul 29, 2024
2 parents 962fc80 + 03765af commit 0b5d400
Show file tree
Hide file tree
Showing 114 changed files with 14,165 additions and 2,342 deletions.
700 changes: 0 additions & 700 deletions .github/workflows/velox_be.yml.deprecated

This file was deleted.

18 changes: 9 additions & 9 deletions .github/workflows/velox_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
--data-gen-strategy=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
Expand All @@ -308,7 +308,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
--data-gen-strategy=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
Expand All @@ -319,7 +319,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
--data-gen-strategy=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \
-d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \
Expand All @@ -330,7 +330,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
--data-gen-strategy=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
Expand All @@ -341,7 +341,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
--data-gen-strategy=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \
Expand All @@ -352,7 +352,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx3G sbin/gluten-it.sh parameterized \
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen -m=OffHeapExecutionMemory \
--data-gen-strategy=skip -m=OffHeapExecutionMemory \
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
Expand Down Expand Up @@ -408,7 +408,7 @@ jobs:
cd tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \
--local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \
--skip-data-gen --random-kill-tasks --no-session-reuse
--data-gen-strategy=skip --random-kill-tasks --no-session-reuse
# run-tpc-test-ubuntu-sf30:
# needs: build-native-lib-centos-7
Expand Down Expand Up @@ -457,10 +457,10 @@ jobs:
# cd tools/gluten-it \
# && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
# --local --preset=velox --benchmark-type=h --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \
# --skip-data-gen --shard=${{ matrix.shard }} \
# --data-gen-strategy=skip --shard=${{ matrix.shard }} \
# && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries-compare \
# --local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \
# --skip-data-gen --shard=${{ matrix.shard }}
# --data-gen-strategy=skip --shard=${{ matrix.shard }}

run-tpc-test-centos8-uniffle:
needs: build-native-lib-centos-7
Expand Down
57 changes: 0 additions & 57 deletions .github/workflows/velox_tpch_bench.yml.disabled

This file was deleted.

60 changes: 0 additions & 60 deletions .github/workflows/velox_tpch_merge.yml.disabled

This file was deleted.

67 changes: 0 additions & 67 deletions .github/workflows/velox_velox_ut.yml.disabled

This file was deleted.

7 changes: 5 additions & 2 deletions .github/workflows/velox_weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ "centos:7", "centos:8" ]
os: [ "centos:7", "centos:8", "quay.io/centos/centos:stream9" ]
runs-on: ubuntu-20.04
container: ${{ matrix.os }}
steps:
Expand All @@ -55,11 +55,14 @@ jobs:
-e 's/^#baseurl/baseurl/' \
-e 's/mirror\.centos\.org/vault.centos.org/' \
/etc/yum.repos.d/CentOS-SCLo-scl-rh.repo
elif [ "${{ matrix.os }}" = "quay.io/centos/centos:stream9" ]; then
dnf install -y --setopt=install_weak_deps=False gcc-toolset-12
source /opt/rh/gcc-toolset-12/enable || exit 1
else
dnf install -y --setopt=install_weak_deps=False gcc-toolset-9
source /opt/rh/gcc-toolset-9/enable || exit 1
fi
yum install -y java-1.8.0-openjdk-devel patch wget git
yum install -y java-1.8.0-openjdk-devel patch wget git perl
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \
export PATH=$JAVA_HOME/bin:$PATH
wget --no-check-certificate https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public class OperatorMetrics implements IOperatorMetrics {
public JoinParams joinParams;
public AggregationParams aggParams;

public long physicalWrittenBytes;
public long numWrittenFiles;

/** Create an instance for operator metrics. */
public OperatorMetrics(
List<MetricsData> metricsList, JoinParams joinParams, AggregationParams aggParams) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,8 @@
import org.apache.gluten.metrics.IMetrics;
import org.apache.gluten.metrics.NativeMetrics;

import org.apache.spark.sql.execution.utils.CHExecUtil;
import org.apache.spark.sql.vectorized.ColumnVector;
import org.apache.spark.sql.vectorized.ColumnarBatch;

import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;

public class BatchIterator extends GeneralOutIterator {
Expand All @@ -43,8 +40,6 @@ public String getId() {

private native boolean nativeHasNext(long nativeHandle);

private native byte[] nativeNext(long nativeHandle);

private native long nativeCHNext(long nativeHandle);

private native void nativeClose(long nativeHandle);
Expand All @@ -54,22 +49,15 @@ public String getId() {
private native String nativeFetchMetrics(long nativeHandle);

@Override
public boolean hasNextInternal() throws IOException {
public boolean hasNextInternal() {
return nativeHasNext(handle);
}

@Override
public ColumnarBatch nextInternal() throws IOException {
public ColumnarBatch nextInternal() {
long block = nativeCHNext(handle);
CHNativeBlock nativeBlock = new CHNativeBlock(block);
int cols = nativeBlock.numColumns();
ColumnVector[] columnVectors = new ColumnVector[cols];
for (int i = 0; i < cols; i++) {
columnVectors[i] =
new CHColumnVector(
CHExecUtil.inferSparkDataType(nativeBlock.getTypeByPosition(i)), block, i);
}
return new ColumnarBatch(columnVectors, nativeBlock.numRows());
return nativeBlock.toColumnarBatch();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

public class CHColumnVector extends ColumnVector {
private final int columnPosition;
private long blockAddress;
private final long blockAddress;

public CHColumnVector(DataType type, long blockAddress, int columnPosition) {
super(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,13 @@ public static void closeFromColumnarBatch(ColumnarBatch cb) {
}

public ColumnarBatch toColumnarBatch() {
ColumnVector[] vectors = new ColumnVector[numColumns()];
for (int i = 0; i < numColumns(); i++) {
int numRows = numRows();
int cols = numColumns();
ColumnVector[] vectors = new ColumnVector[cols];
for (int i = 0; i < cols; i++) {
vectors[i] =
new CHColumnVector(CHExecUtil.inferSparkDataType(getTypeByPosition(i)), blockAddress, i);
}
int numRows = 0;
if (numColumns() != 0) {
numRows = numRows();
}
return new ColumnarBatch(vectors, numRows);
}
}
Loading

0 comments on commit 0b5d400

Please sign in to comment.