From 0a98a1a728b32dccc4d532c87167994378a4f5a2 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Fri, 31 May 2024 10:22:44 +0800 Subject: [PATCH] [VL] Do not skip updating children's metrics while visiting an operator with NoopMetricsUpdater --- .../metrics/HashAggregateMetricsUpdater.scala | 2 +- .../metrics/HashJoinMetricsUpdater.scala | 2 +- .../apache/gluten/metrics/MetricsUtil.scala | 2 +- .../gluten/execution/VeloxMetricsSuite.scala | 22 ++++++++++++- .../gluten/metrics/MetricsUpdater.scala | 16 +++++---- .../apache/gluten/metrics/MetricsUtil.scala | 33 ++++++++++--------- 6 files changed, 50 insertions(+), 27 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashAggregateMetricsUpdater.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashAggregateMetricsUpdater.scala index e2014e5b8b844..b035d7a04fb0a 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashAggregateMetricsUpdater.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashAggregateMetricsUpdater.scala @@ -65,7 +65,7 @@ class HashAggregateMetricsUpdater(val metrics: Map[String, SQLMetric]) } } } catch { - case e: Throwable => + case e: Exception => logError(s"Updating native metrics failed due to ${e.getCause}.") throw e } diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashJoinMetricsUpdater.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashJoinMetricsUpdater.scala index 3c35286c1c13f..ca891bac27c63 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashJoinMetricsUpdater.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/HashJoinMetricsUpdater.scala @@ -104,7 +104,7 @@ class HashJoinMetricsUpdater(val metrics: Map[String, SQLMetric]) } } } catch { - case e: Throwable => + case e: Exception => logError(s"Updating native metrics failed due to ${e.getCause}.") throw e } diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala index a6dfb3dbcb1fd..20a9a362536c3 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala @@ -120,7 +120,7 @@ object MetricsUtil extends Logging { aggParamsMap) } } catch { - case e: Throwable => + case e: Exception => logWarning(s"Updating native metrics failed due to ${e.getCause}.") () } diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala index ce8450fea4231..63008f6477379 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala @@ -19,7 +19,8 @@ package org.apache.gluten.execution import org.apache.gluten.GlutenConfig import org.apache.gluten.sql.shims.SparkShimLoader -import org.apache.spark.sql.execution.CommandResultExec +import org.apache.spark.SparkConf +import org.apache.spark.sql.execution.{CommandResultExec, InputIteratorTransformer} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.internal.SQLConf @@ -52,6 +53,11 @@ class VeloxMetricsSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa super.afterAll() } + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + } + test("test sort merge join metrics") { withSQLConf( GlutenConfig.COLUMNAR_FPRCE_SHUFFLED_HASH_JOIN_ENABLED.key -> "false", @@ -164,4 +170,18 @@ class VeloxMetricsSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa } } } + + test("Metrics of top-n's children") { + runQueryAndCompare("SELECT c1, c2 FROM metrics_t1 order by c2 limit 5") { + df => + val iit = find(df.queryExecution.executedPlan) { + case _: InputIteratorTransformer => true + case _ => false + } + assert(iit.isDefined) + val metrics = iit.get.metrics + assert(metrics("numOutputRows").value == 5) + assert(metrics("outputVectors").value == 1) + } + } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/metrics/MetricsUpdater.scala b/gluten-core/src/main/scala/org/apache/gluten/metrics/MetricsUpdater.scala index 0a622ba0b37d4..8fca4f201dba6 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/metrics/MetricsUpdater.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/metrics/MetricsUpdater.scala @@ -16,7 +16,6 @@ */ package org.apache.gluten.metrics -import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.utils.OASPackageBridge.InputMetricsWrapper /** @@ -26,16 +25,19 @@ import org.apache.spark.sql.utils.OASPackageBridge.InputMetricsWrapper * TODO: place it to some other where since it's used not only by whole stage facilities */ trait MetricsUpdater extends Serializable { - - def metrics: Map[String, SQLMetric] - def updateInputMetrics(inputMetrics: InputMetricsWrapper): Unit = {} - def updateNativeMetrics(operatorMetrics: IOperatorMetrics): Unit = {} } +object NoopMetricsUpdater extends MetricsUpdater {} + final case class MetricsUpdaterTree(updater: MetricsUpdater, children: Seq[MetricsUpdaterTree]) -object NoopMetricsUpdater extends MetricsUpdater { - override def metrics: Map[String, SQLMetric] = Map.empty +object MetricsUpdaterTree { + object Terminate extends MetricsUpdater { + override def updateInputMetrics(inputMetrics: InputMetricsWrapper): Unit = + throw new UnsupportedOperationException() + override def updateNativeMetrics(operatorMetrics: IOperatorMetrics): Unit = + throw new UnsupportedOperationException() + } } diff --git a/gluten-data/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala b/gluten-data/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala index f11800b89c31f..a1ce3ec780ae0 100644 --- a/gluten-data/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala +++ b/gluten-data/src/main/scala/org/apache/gluten/metrics/MetricsUtil.scala @@ -57,7 +57,7 @@ object MetricsUtil extends Logging { case t: TransformSupport => MetricsUpdaterTree(t.metricsUpdater(), t.children.map(treeifyMetricsUpdaters)) case _ => - MetricsUpdaterTree(NoopMetricsUpdater, Seq()) + MetricsUpdaterTree(MetricsUpdaterTree.Terminate, Seq()) } } @@ -180,6 +180,8 @@ object MetricsUtil extends Logging { ) } + // FIXME: Metrics updating code is too magical to maintain. Tree-walking algorithm should be made + // more declarative than by counting down some kind of counters that don't have fixed definition. /** * @return * operator index and metrics index @@ -192,6 +194,9 @@ object MetricsUtil extends Logging { metricsIdx: Int, joinParamsMap: JMap[JLong, JoinParams], aggParamsMap: JMap[JLong, AggregationParams]): (JLong, Int) = { + if (mutNode.updater == MetricsUpdaterTree.Terminate) { + return (operatorIdx, metricsIdx) + } val operatorMetrics = new JArrayList[OperatorMetrics]() var curMetricsIdx = metricsIdx relMap @@ -245,18 +250,16 @@ object MetricsUtil extends Logging { mutNode.children.foreach { child => - if (child.updater != NoopMetricsUpdater) { - val result = updateTransformerMetricsInternal( - child, - relMap, - newOperatorIdx, - metrics, - newMetricsIdx, - joinParamsMap, - aggParamsMap) - newOperatorIdx = result._1 - newMetricsIdx = result._2 - } + val result = updateTransformerMetricsInternal( + child, + relMap, + newOperatorIdx, + metrics, + newMetricsIdx, + joinParamsMap, + aggParamsMap) + newOperatorIdx = result._1 + newMetricsIdx = result._2 } (newOperatorIdx, newMetricsIdx) @@ -292,8 +295,6 @@ object MetricsUtil extends Logging { val numNativeMetrics = metrics.inputRows.length if (numNativeMetrics == 0) { () - } else if (mutNode.updater == NoopMetricsUpdater) { - () } else { updateTransformerMetricsInternal( mutNode, @@ -305,7 +306,7 @@ object MetricsUtil extends Logging { aggParamsMap) } } catch { - case e: Throwable => + case e: Exception => logWarning(s"Updating native metrics failed due to ${e.getCause}.") () }