Skip to content

Commit

Permalink
redact sensitive confs
Browse files Browse the repository at this point in the history
  • Loading branch information
ArnavBalyan committed Aug 12, 2024
1 parent 5d6c6f3 commit 961c922
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 11 deletions.
39 changes: 30 additions & 9 deletions cpp/core/config/GlutenConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,47 @@
#include "compute/ProtobufUtils.h"
#include "config.pb.h"
#include "jni/JniError.h"
#include <regex>
#include <optional>

namespace gluten {

const std::string REDACTED_VALUE = "*********(redacted)";
const std::string REGEX_REDACT_KEY = "spark.gluten.redaction.regex";

std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength) {
std::unordered_map<std::string, std::string> sparkConfs;
ConfigMap pConfigMap;
gluten::parseProtobuf(planData, planDataLength, &pConfigMap);
for (const auto& pair : pConfigMap.configs()) {
sparkConfs.emplace(pair.first, pair.second);
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength) {
std::unordered_map<std::string, std::string> sparkConfs;
ConfigMap pConfigMap;
gluten::parseProtobuf(planData, planDataLength, &pConfigMap);
for (const auto& pair : pConfigMap.configs()) {
sparkConfs.emplace(pair.first, pair.second);
}

return sparkConfs;
}

std::optional<std::regex> getRedactionRegex(const std::unordered_map<std::string, std::string>& conf) {
auto it = conf.find(REGEX_REDACT_KEY);
if (it != conf.end()) {
return std::regex(it->second);
}
return std::nullopt;
}

std::string printConfig(const std::unordered_map<std::string, std::string>& conf) {
std::ostringstream oss;
oss << std::endl;
for (auto& [k, v] : conf) {
oss << " [" << k << ", " << v << "]\n";

auto redactionRegex = getRedactionRegex(conf);

for (const auto& [k, v] : conf) {
if (redactionRegex && std::regex_match(k, *redactionRegex)) {
oss << " [" << k << ", " << REDACTED_VALUE << "]\n";
} else {
oss << " [" << k << ", " << v << "]\n";
}
}
return oss.str();
}

} // namespace gluten
2 changes: 2 additions & 0 deletions cpp/core/config/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ const std::string kShuffleCompressionCodecBackend = "spark.gluten.sql.columnar.s
const std::string kQatBackendName = "qat";
const std::string kIaaBackendName = "iaa";

const std::string kRedactionRegex = "spark.gluten.redaction.regex";

std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ object GlutenConfig {

val GLUTEN_COST_EVALUATOR_ENABLED = "spark.gluten.sql.adaptive.costEvaluator.enabled"

val GLUTEN_REGEX_LOG_REDACTION = "spark.gluten.redaction.regex"
var ins: GlutenConfig = _

def getConf: GlutenConfig = {
Expand Down Expand Up @@ -673,7 +674,8 @@ object GlutenConfig {
// gcs config
SPARK_GCS_STORAGE_ROOT_URL,
SPARK_GCS_AUTH_TYPE,
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE,
GLUTEN_REGEX_LOG_REDACTION
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down Expand Up @@ -757,7 +759,8 @@ object GlutenConfig {
GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
GLUTEN_OFFHEAP_ENABLED,
SESSION_LOCAL_TIMEZONE.key,
DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key
DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key,
GLUTEN_REGEX_LOG_REDACTION
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down

0 comments on commit 961c922

Please sign in to comment.