diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 69f705e269dd..cf3c1431c664 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -62,6 +62,9 @@ const std::string kAbandonPartialAggregationMinPct = "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct"; const std::string kAbandonPartialAggregationMinRows = "spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows"; +const std::string kBloomFilterExpectedNumItems = "spark.sql.optimizer.runtime.bloomFilter.expectedNumItems"; +const std::string kBloomFilterNumBits = "spark.sql.optimizer.runtime.bloomFilter.numBits"; +const std::string kBloomFilterMaxNumBits = "spark.sql.optimizer.runtime.bloomFilter.maxNumBits"; // metrics const std::string kDynamicFiltersProduced = "dynamicFiltersProduced"; @@ -363,6 +366,12 @@ std::unordered_map WholeStageResultIterator::getQueryC configs[velox::core::QueryConfig::kSpillableReservationGrowthPct] = getConfigValue(confMap_, kSpillableReservationGrowthPct, "25"); configs[velox::core::QueryConfig::kSpillCompressionKind] = getConfigValue(confMap_, kSpillCompressionKind, "lz4"); + configs[velox::core::QueryConfig::kSparkBloomFilterExpectedNumItems] = + getConfigValue(confMap_, kBloomFilterExpectedNumItems, "1000000"); + configs[velox::core::QueryConfig::kSparkBloomFilterNumBits] = + getConfigValue(confMap_, kBloomFilterNumBits, "8388608"); + configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumBits] = + getConfigValue(confMap_, kBloomFilterMaxNumBits, "4194304"); } catch (const std::invalid_argument& err) { std::string errDetails = err.what(); throw std::runtime_error("Invalid conf arg: " + errDetails); diff --git a/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala b/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala index b3b1be0b9629..c13cc63455c0 100644 --- a/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala +++ b/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala @@ -422,7 +422,10 @@ object GlutenConfig { SQLConf.SESSION_LOCAL_TIMEZONE.key, GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY, SQLConf.LEGACY_SIZE_OF_NULL.key, - "spark.io.compression.codec" + "spark.io.compression.codec", + "spark.sql.optimizer.runtime.bloomFilter.expectedNumItems", + "spark.sql.optimizer.runtime.bloomFilter.numBits", + "spark.sql.optimizer.runtime.bloomFilter.maxNumBits" ) keys.forEach( k => {