diff --git a/cpp/core/config/GlutenConfig.cc b/cpp/core/config/GlutenConfig.cc index fa04ecfa4e5c..bc6ad1cbe859 100644 --- a/cpp/core/config/GlutenConfig.cc +++ b/cpp/core/config/GlutenConfig.cc @@ -15,13 +15,26 @@ * limitations under the License. */ +#include #include - +#include #include "compute/ProtobufUtils.h" #include "config.pb.h" #include "jni/JniError.h" +namespace { + +std::optional getRedactionRegex(const std::unordered_map& conf) { + auto it = conf.find(gluten::kSparkRedactionRegex); + if (it != conf.end()) { + return boost::regex(it->second); + } + return std::nullopt; +} +} // namespace + namespace gluten { + std::unordered_map parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength) { std::unordered_map sparkConfs; @@ -37,9 +50,17 @@ parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength) std::string printConfig(const std::unordered_map& conf) { std::ostringstream oss; oss << std::endl; - for (auto& [k, v] : conf) { - oss << " [" << k << ", " << v << "]\n"; + + auto redactionRegex = getRedactionRegex(conf); + + for (const auto& [k, v] : conf) { + if (redactionRegex && boost::regex_match(k, *redactionRegex)) { + oss << " [" << k << ", " << kSparkRedactionString << "]\n"; + } else { + oss << " [" << k << ", " << v << "]\n"; + } } return oss.str(); } + } // namespace gluten diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h index 057d85930d2a..31318ff0aa0c 100644 --- a/cpp/core/config/GlutenConfig.h +++ b/cpp/core/config/GlutenConfig.h @@ -66,6 +66,9 @@ const std::string kShuffleCompressionCodecBackend = "spark.gluten.sql.columnar.s const std::string kQatBackendName = "qat"; const std::string kIaaBackendName = "iaa"; +const std::string kSparkRedactionRegex = "spark.redaction.regex"; +const std::string kSparkRedactionString = "*********(redacted)"; + std::unordered_map parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength); diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index 88491f6bf4b1..fa78060dad6c 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -538,6 +538,7 @@ object GlutenConfig { val GLUTEN_ONHEAP_SIZE_KEY = "spark.executor.memory" val GLUTEN_OFFHEAP_SIZE_KEY = "spark.memory.offHeap.size" val GLUTEN_OFFHEAP_ENABLED = "spark.memory.offHeap.enabled" + val SPARK_REDACTION_REGEX = "spark.redaction.regex" // For Soft Affinity Scheduling // Enable Soft Affinity Scheduling, default value is false @@ -677,7 +678,8 @@ object GlutenConfig { // gcs config SPARK_GCS_STORAGE_ROOT_URL, SPARK_GCS_AUTH_TYPE, - SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE + SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE, + SPARK_REDACTION_REGEX ) nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava) @@ -764,7 +766,8 @@ object GlutenConfig { GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY, GLUTEN_OFFHEAP_ENABLED, SESSION_LOCAL_TIMEZONE.key, - DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key + DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key, + SPARK_REDACTION_REGEX ) nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)