Skip to content

Commit

Permalink
[VL] RAS: Make default rough cost model exhaustively offload computat…
Browse files Browse the repository at this point in the history
…ions (#6493)
  • Loading branch information
zhztheplayer authored Jul 19, 2024
1 parent 43a3a3a commit c77131f
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,58 @@
*/
package org.apache.gluten.execution

import org.apache.spark.SparkException
import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.sql.catalyst.optimizer.NullPropagation
import org.apache.spark.sql.execution.ProjectExec
import org.apache.spark.sql.types._

import java.sql.Timestamp

class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
class ScalarFunctionsValidateSuiteRasOff extends ScalarFunctionsValidateSuite {
override protected def sparkConf: SparkConf = {
super.sparkConf
.set("spark.gluten.ras.enabled", "false")
}

// Since https://github.com/apache/incubator-gluten/pull/6200.
test("Test input_file_name function") {
runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from lineitem limit 100""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}

runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from
| (select l_orderkey from lineitem
| union all
| select o_orderkey as l_orderkey from orders)
| limit 100""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}
}

class ScalarFunctionsValidateSuiteRasOn extends ScalarFunctionsValidateSuite {
override protected def sparkConf: SparkConf = {
super.sparkConf
.set("spark.gluten.ras.enabled", "true")
}

// TODO: input_file_name is not yet supported in RAS
ignore("Test input_file_name function") {
runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from lineitem limit 100""".stripMargin) { _ => }

runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from
| (select l_orderkey from lineitem
| union all
| select o_orderkey as l_orderkey from orders)
| limit 100""".stripMargin) { _ => }
}
}

abstract class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
disableFallbackCheck
import testImplicits._

Expand Down Expand Up @@ -658,22 +702,6 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
}
}

test("Test input_file_name function") {
runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from lineitem limit 100""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}

runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from
| (select l_orderkey from lineitem
| union all
| select o_orderkey as l_orderkey from orders)
| limit 100""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}

test("Test sequence function optimized by Spark constant folding") {
withSQLConf(("spark.sql.optimizer.excludedRules", NullPropagation.ruleName)) {
runQueryAndCompare("""SELECT sequence(1, 5), l_orderkey
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,21 @@ object GlutenCostModel extends Logging {
(n.children.map(longCostOf).toList :+ selfCost).reduce(safeSum)
}

// A very rough estimation as of now.
// A very rough estimation as of now. The cost model basically considers any
// fallen back ops has extreme high cost so offloads computations as much as possible.
private def selfLongCostOf(node: SparkPlan): Long = {
node match {
case _: RemoveFilter.NoopFilter =>
// To make planner choose the tree that has applied rule PushFilterToScan.
0L
case ColumnarToRowExec(child) => 3L
case RowToColumnarExec(child) => 3L
case ColumnarToRowLike(child) => 3L
case RowToColumnarLike(child) => 3L
case p if PlanUtil.isGlutenColumnarOp(p) => 2L
case p if PlanUtil.isVanillaColumnarOp(p) => 3L
case ColumnarToRowExec(child) => 10L
case RowToColumnarExec(child) => 10L
case ColumnarToRowLike(child) => 10L
case RowToColumnarLike(child) => 10L
case p if PlanUtil.isGlutenColumnarOp(p) => 10L
case p if PlanUtil.isVanillaColumnarOp(p) => 1000L
// Other row ops. Usually a vanilla row op.
case _ => 5L
case _ => 1000L
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export MOUNT_MAVEN_CACHE=ON
# Additionally, changes to HTTP_PROXY_HOST / HTTP_PROXY_PORT could invalidate the build cache
# either. For more details, please check docker file `dockerfile-buildenv`.
cd gluten/
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh

# 4. Check the built libs.
ls -l cpp/build/releases/
Expand Down

0 comments on commit c77131f

Please sign in to comment.