Skip to content

Commit

Permalink
[VL] Minor improvements and fixes for gluten-it and gluten-te
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored Jul 16, 2024
1 parent 306791d commit 86a683a
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ public class BaseMixin {
@CommandLine.Option(names = {"--error-on-memleak"}, description = "Fail the test when memory leak is detected by Spark's memory manager", defaultValue = "false")
private boolean errorOnMemLeak;

@CommandLine.Option(names = {"--data-dir"}, description = "Location for storing data used by tests", defaultValue = "/tmp")
private String dataDir;

@CommandLine.Option(names = {"--enable-ui"}, description = "Enable Spark UI", defaultValue = "false")
private boolean enableUi;

Expand Down Expand Up @@ -129,19 +132,19 @@ public Integer runActions(Action[] actions) {
switch (benchmarkType) {
case "h":
suite = new TpchSuite(runModeEnumeration.getSparkMasterUrl(), actions, testConf,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
case "ds":
suite = new TpcdsSuite(runModeEnumeration.getSparkMasterUrl(), actions, testConf,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
case "clickbench":
suite = new ClickBenchSuite(runModeEnumeration.getSparkMasterUrl(), actions, testConf,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import org.apache.gluten.integration.{DataGen, Suite, TableCreator}
import org.apache.log4j.Level
import org.apache.spark.SparkConf

import java.io.File

/**
* ClickBench: a Benchmark For Analytical Databases
*
Expand All @@ -35,6 +37,7 @@ class ClickBenchSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
Expand Down Expand Up @@ -69,7 +72,7 @@ class ClickBenchSuite(
scale: Double,
genPartitionedData: Boolean): String = {
checkDataGenArgs(scale, genPartitionedData)
DATA_WRITE_PATH
new File(dataDir).toPath.resolve(DATA_WRITE_RELATIVE_PATH + s"-$scale").toFile.getAbsolutePath
}

override private[integration] def createDataGen(
Expand All @@ -88,7 +91,7 @@ class ClickBenchSuite(
}

private object ClickBenchSuite {
private val DATA_WRITE_PATH = "/tmp/clickbench-generated"
private val DATA_WRITE_RELATIVE_PATH = "clickbench-generated"
private val HISTORY_WRITE_PATH = "/tmp/clickbench-history"
private val ALL_QUERY_IDS = (1 to 43).map(i => s"q$i").toArray

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@
package org.apache.gluten.integration.ds

import org.apache.gluten.integration.action.Action
import org.apache.gluten.integration.ds.TpcdsSuite.{
ALL_QUERY_IDS,
HISTORY_WRITE_PATH,
TPCDS_WRITE_PATH
}
import org.apache.gluten.integration.ds.TpcdsSuite.{ALL_QUERY_IDS, HISTORY_WRITE_PATH, TPCDS_WRITE_RELATIVE_PATH}
import org.apache.gluten.integration.{DataGen, Suite, TableCreator, TypeModifier}
import org.apache.log4j.Level
import org.apache.spark.SparkConf

import java.io.File

class TpcdsSuite(
val masterUrl: String,
val actions: Array[Action],
Expand All @@ -34,6 +32,7 @@ class TpcdsSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
Expand Down Expand Up @@ -66,7 +65,7 @@ class TpcdsSuite(
override private[integration] def dataWritePath(
scale: Double,
genPartitionedData: Boolean): String =
TPCDS_WRITE_PATH + s"-$scale"
new File(dataDir).toPath.resolve(TPCDS_WRITE_RELATIVE_PATH + s"-$scale").toFile.getAbsolutePath

override private[integration] def createDataGen(
scale: Double,
Expand Down Expand Up @@ -95,7 +94,7 @@ class TpcdsSuite(
}

object TpcdsSuite {
private val TPCDS_WRITE_PATH = "/tmp/tpcds-generated"
private val TPCDS_WRITE_RELATIVE_PATH = "tpcds-generated"
private val ALL_QUERY_IDS = Array(
"q1",
"q2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
package org.apache.gluten.integration.h

import org.apache.gluten.integration.action.Action
import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH, TPCH_WRITE_PATH}
import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH, TPCH_WRITE_RELATIVE_PATH}
import org.apache.gluten.integration.{DataGen, Suite, TableCreator, TypeModifier}
import org.apache.log4j.Level
import org.apache.spark.SparkConf

import java.io.File

class TpchSuite(
val masterUrl: String,
val actions: Array[Action],
Expand All @@ -30,6 +32,7 @@ class TpchSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
Expand Down Expand Up @@ -62,7 +65,7 @@ class TpchSuite(
override private[integration] def dataWritePath(
scale: Double,
genPartitionedData: Boolean): String =
TPCH_WRITE_PATH + s"-$scale"
new File(dataDir).toPath.resolve(TPCH_WRITE_RELATIVE_PATH + s"-$scale").toFile.getAbsolutePath

override private[integration] def createDataGen(
scale: Double,
Expand Down Expand Up @@ -90,7 +93,7 @@ class TpchSuite(
}

object TpchSuite {
private val TPCH_WRITE_PATH = "/tmp/tpch-generated"
private val TPCH_WRITE_RELATIVE_PATH = "tpch-generated"
private val ALL_QUERY_IDS = Array(
"q1",
"q2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ set -ex
export NUM_THREADS=$(nproc)
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)

# Retry code that copied from https://unix.stackexchange.com/a/137639.
# Retry code copied from https://unix.stackexchange.com/a/137639.
function fail {
echo $1 >&2
exit 1
Expand All @@ -43,6 +43,7 @@ function retry {
}

cd /opt/gluten
retry apt-get install curl zip unzip tar pkg-config autoconf-archive bison flex
retry apt-get update
retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison flex
retry source ./dev/vcpkg/env.sh
retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON

0 comments on commit 86a683a

Please sign in to comment.