Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-6368] Redact sensitive configs when calling gluten::printConfig #6793

Merged
merged 7 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions cpp/core/config/GlutenConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,25 @@
*/

#include <jni.h>

#include <optional>
#include <regex>
#include "compute/ProtobufUtils.h"
#include "config.pb.h"
#include "jni/JniError.h"

namespace {

std::optional<std::regex> getRedactionRegex(const std::unordered_map<std::string, std::string>& conf) {
auto it = conf.find(gluten::kSparkRedactionRegex);
if (it != conf.end()) {
return std::regex(it->second);
}
return std::nullopt;
}
} // namespace

namespace gluten {

std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength) {
std::unordered_map<std::string, std::string> sparkConfs;
Expand All @@ -37,9 +50,17 @@ parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength)
std::string printConfig(const std::unordered_map<std::string, std::string>& conf) {
std::ostringstream oss;
oss << std::endl;
for (auto& [k, v] : conf) {
oss << " [" << k << ", " << v << "]\n";

auto redactionRegex = getRedactionRegex(conf);

for (const auto& [k, v] : conf) {
if (redactionRegex && std::regex_match(k, *redactionRegex)) {
oss << " [" << k << ", " << kSparkRedactionString << "]\n";
} else {
oss << " [" << k << ", " << v << "]\n";
}
}
return oss.str();
}

} // namespace gluten
3 changes: 3 additions & 0 deletions cpp/core/config/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ const std::string kShuffleCompressionCodecBackend = "spark.gluten.sql.columnar.s
const std::string kQatBackendName = "qat";
const std::string kIaaBackendName = "iaa";

const std::string kSparkRedactionRegex = "spark.redaction.regex";
const std::string kSparkRedactionString = "*********(redacted)";

std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ object GlutenConfig {
val GLUTEN_ONHEAP_SIZE_KEY = "spark.executor.memory"
val GLUTEN_OFFHEAP_SIZE_KEY = "spark.memory.offHeap.size"
val GLUTEN_OFFHEAP_ENABLED = "spark.memory.offHeap.enabled"
val SPARK_REDACTION_REGEX = "spark.redaction.regex"

// For Soft Affinity Scheduling
// Enable Soft Affinity Scheduling, defalut value is false
Expand Down Expand Up @@ -673,7 +674,8 @@ object GlutenConfig {
// gcs config
SPARK_GCS_STORAGE_ROOT_URL,
SPARK_GCS_AUTH_TYPE,
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE,
SPARK_REDACTION_REGEX
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down Expand Up @@ -757,7 +759,8 @@ object GlutenConfig {
GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
GLUTEN_OFFHEAP_ENABLED,
SESSION_LOCAL_TIMEZONE.key,
DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key
DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key,
SPARK_REDACTION_REGEX
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down
Loading