Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VL] Minor improvements and fixes for gluten-it and gluten-te #6471

Merged
merged 1 commit into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ public class BaseMixin {
@CommandLine.Option(names = {"--error-on-memleak"}, description = "Fail the test when memory leak is detected by Spark's memory manager", defaultValue = "false")
private boolean errorOnMemLeak;

@CommandLine.Option(names = {"--data-dir"}, description = "Location for storing data used by tests", defaultValue = "/tmp")
private String dataDir;

@CommandLine.Option(names = {"--enable-ui"}, description = "Enable Spark UI", defaultValue = "false")
private boolean enableUi;

Expand Down Expand Up @@ -129,19 +132,19 @@ public Integer runActions(Action[] actions) {
switch (benchmarkType) {
case "h":
suite = new TpchSuite(runModeEnumeration.getSparkMasterUrl(), actions, testConf,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
case "ds":
suite = new TpcdsSuite(runModeEnumeration.getSparkMasterUrl(), actions, testConf,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
case "clickbench":
suite = new ClickBenchSuite(runModeEnumeration.getSparkMasterUrl(), actions, testConf,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, enableUi,
baselineConf, extraSparkConfScala, level, errorOnMemLeak, dataDir, enableUi,
enableHsUi, hsUiPort, offHeapSize, disableAqe, disableBhj,
disableWscg, shufflePartitions, scanPartitions);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import org.apache.gluten.integration.{DataGen, Suite, TableCreator}
import org.apache.log4j.Level
import org.apache.spark.SparkConf

import java.io.File

/**
* ClickBench: a Benchmark For Analytical Databases
*
Expand All @@ -35,6 +37,7 @@ class ClickBenchSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
Expand Down Expand Up @@ -69,7 +72,7 @@ class ClickBenchSuite(
scale: Double,
genPartitionedData: Boolean): String = {
checkDataGenArgs(scale, genPartitionedData)
DATA_WRITE_PATH
new File(dataDir).toPath.resolve(DATA_WRITE_RELATIVE_PATH + s"-$scale").toFile.getAbsolutePath
}

override private[integration] def createDataGen(
Expand All @@ -88,7 +91,7 @@ class ClickBenchSuite(
}

private object ClickBenchSuite {
private val DATA_WRITE_PATH = "/tmp/clickbench-generated"
private val DATA_WRITE_RELATIVE_PATH = "clickbench-generated"
private val HISTORY_WRITE_PATH = "/tmp/clickbench-history"
private val ALL_QUERY_IDS = (1 to 43).map(i => s"q$i").toArray

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,13 @@
package org.apache.gluten.integration.ds

import org.apache.gluten.integration.action.Action
import org.apache.gluten.integration.ds.TpcdsSuite.{
ALL_QUERY_IDS,
HISTORY_WRITE_PATH,
TPCDS_WRITE_PATH
}
import org.apache.gluten.integration.ds.TpcdsSuite.{ALL_QUERY_IDS, HISTORY_WRITE_PATH, TPCDS_WRITE_RELATIVE_PATH}
import org.apache.gluten.integration.{DataGen, Suite, TableCreator, TypeModifier}
import org.apache.log4j.Level
import org.apache.spark.SparkConf

import java.io.File

class TpcdsSuite(
val masterUrl: String,
val actions: Array[Action],
Expand All @@ -34,6 +32,7 @@ class TpcdsSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
Expand Down Expand Up @@ -66,7 +65,7 @@ class TpcdsSuite(
override private[integration] def dataWritePath(
scale: Double,
genPartitionedData: Boolean): String =
TPCDS_WRITE_PATH + s"-$scale"
new File(dataDir).toPath.resolve(TPCDS_WRITE_RELATIVE_PATH + s"-$scale").toFile.getAbsolutePath

override private[integration] def createDataGen(
scale: Double,
Expand Down Expand Up @@ -95,7 +94,7 @@ class TpcdsSuite(
}

object TpcdsSuite {
private val TPCDS_WRITE_PATH = "/tmp/tpcds-generated"
private val TPCDS_WRITE_RELATIVE_PATH = "tpcds-generated"
private val ALL_QUERY_IDS = Array(
"q1",
"q2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
package org.apache.gluten.integration.h

import org.apache.gluten.integration.action.Action
import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH, TPCH_WRITE_PATH}
import org.apache.gluten.integration.h.TpchSuite.{HISTORY_WRITE_PATH, TPCH_WRITE_RELATIVE_PATH}
import org.apache.gluten.integration.{DataGen, Suite, TableCreator, TypeModifier}
import org.apache.log4j.Level
import org.apache.spark.SparkConf

import java.io.File

class TpchSuite(
val masterUrl: String,
val actions: Array[Action],
Expand All @@ -30,6 +32,7 @@ class TpchSuite(
val extraSparkConf: Map[String, String],
val logLevel: Level,
val errorOnMemLeak: Boolean,
val dataDir: String,
val enableUi: Boolean,
val enableHsUi: Boolean,
val hsUiPort: Int,
Expand Down Expand Up @@ -62,7 +65,7 @@ class TpchSuite(
override private[integration] def dataWritePath(
scale: Double,
genPartitionedData: Boolean): String =
TPCH_WRITE_PATH + s"-$scale"
new File(dataDir).toPath.resolve(TPCH_WRITE_RELATIVE_PATH + s"-$scale").toFile.getAbsolutePath

override private[integration] def createDataGen(
scale: Double,
Expand Down Expand Up @@ -90,7 +93,7 @@ class TpchSuite(
}

object TpchSuite {
private val TPCH_WRITE_PATH = "/tmp/tpch-generated"
private val TPCH_WRITE_RELATIVE_PATH = "tpch-generated"
private val ALL_QUERY_IDS = Array(
"q1",
"q2",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ set -ex
export NUM_THREADS=$(nproc)
export CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)

# Retry code that copied from https://unix.stackexchange.com/a/137639.
# Retry code copied from https://unix.stackexchange.com/a/137639.
function fail {
echo $1 >&2
exit 1
Expand All @@ -43,6 +43,7 @@ function retry {
}

cd /opt/gluten
retry apt-get install curl zip unzip tar pkg-config autoconf-archive bison flex
retry apt-get update
retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison flex
retry source ./dev/vcpkg/env.sh
retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
Loading