diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetRFSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetRFSuite.scala index 06d0f41ab7cc..7513e764a6be 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetRFSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseTPCHParquetRFSuite.scala @@ -16,13 +16,59 @@ */ package io.glutenproject.execution -import org.apache.spark.SparkConf +import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf} class GlutenClickHouseTPCHParquetRFSuite extends GlutenClickHouseTPCHParquetSuite { + + protected lazy val sparkVersion: String = { + val version = SPARK_VERSION_SHORT.split("\\.") + version(0) + "." + version(1) + } + override protected def sparkConf: SparkConf = { super.sparkConf // radically small threshold to force runtime bloom filter .set("spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold", "1KB") .set("spark.sql.optimizer.runtime.bloomFilter.enabled", "true") } + + test("GLUTEN-3779: Fix core dump when executing sql with runtime filter") { + withSQLConf( + ("spark.sql.autoBroadcastJoinThreshold", "-1"), + ("spark.sql.files.maxPartitionBytes", "204800"), + ("spark.sql.files.openCostInBytes", "102400") + ) { + compareResultsAgainstVanillaSpark( + """ + |SELECT + | sum(l_extendedprice) / 7.0 AS avg_yearly + |FROM + | lineitem, + | part + |WHERE + | p_partkey = l_partkey + | AND p_size > 5 + | AND l_quantity < ( + | SELECT + | 0.2 * avg(l_quantity) + | FROM + | lineitem + | WHERE + | l_partkey = p_partkey); + | + |""".stripMargin, + compareResult = true, + df => { + if (sparkVersion.equals("3.3")) { + val filterExecs = df.queryExecution.executedPlan.collect { + case filter: FilterExecTransformerBase => filter + } + assert(filterExecs.size == 4) + assert( + filterExecs(0).asInstanceOf[FilterExecTransformer].toString.contains("might_contain")) + } + } + ) + } + } } diff --git a/cpp-ch/local-engine/AggregateFunctions/AggregateFunctionGroupBloomFilter.h b/cpp-ch/local-engine/AggregateFunctions/AggregateFunctionGroupBloomFilter.h index 94883933de76..767ba0550c0b 100644 --- a/cpp-ch/local-engine/AggregateFunctions/AggregateFunctionGroupBloomFilter.h +++ b/cpp-ch/local-engine/AggregateFunctions/AggregateFunctionGroupBloomFilter.h @@ -126,6 +126,11 @@ class AggregateFunctionGroupBloomFilter final : public IAggregateFunctionDataHel void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { + // Skip un-initted values + if (!this->data(rhs).initted) + { + return; + } const auto & bloom_other = this->data(rhs).bloom_filter; const auto & filter_other = bloom_other.getFilter(); if (!this->data(place).initted)