Skip to content

Commit

Permalink
[GLUTEN-6368] Redact sensitive configs when calling `gluten::printCon…
Browse files Browse the repository at this point in the history
…fig` (apache#6793)
  • Loading branch information
ArnavBalyan authored and shamirchen committed Oct 14, 2024
1 parent 2f53396 commit 14df7e6
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 5 deletions.
27 changes: 24 additions & 3 deletions cpp/core/config/GlutenConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,26 @@
* limitations under the License.
*/

#include <boost/regex.hpp>
#include <jni.h>

#include <optional>
#include "compute/ProtobufUtils.h"
#include "config.pb.h"
#include "jni/JniError.h"

namespace {

std::optional<boost::regex> getRedactionRegex(const std::unordered_map<std::string, std::string>& conf) {
auto it = conf.find(gluten::kSparkRedactionRegex);
if (it != conf.end()) {
return boost::regex(it->second);
}
return std::nullopt;
}
} // namespace

namespace gluten {

std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength) {
std::unordered_map<std::string, std::string> sparkConfs;
Expand All @@ -37,9 +50,17 @@ parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength)
std::string printConfig(const std::unordered_map<std::string, std::string>& conf) {
std::ostringstream oss;
oss << std::endl;
for (auto& [k, v] : conf) {
oss << " [" << k << ", " << v << "]\n";

auto redactionRegex = getRedactionRegex(conf);

for (const auto& [k, v] : conf) {
if (redactionRegex && boost::regex_match(k, *redactionRegex)) {
oss << " [" << k << ", " << kSparkRedactionString << "]\n";
} else {
oss << " [" << k << ", " << v << "]\n";
}
}
return oss.str();
}

} // namespace gluten
3 changes: 3 additions & 0 deletions cpp/core/config/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ const std::string kShuffleCompressionCodecBackend = "spark.gluten.sql.columnar.s
const std::string kQatBackendName = "qat";
const std::string kIaaBackendName = "iaa";

const std::string kSparkRedactionRegex = "spark.redaction.regex";
const std::string kSparkRedactionString = "*********(redacted)";

std::unordered_map<std::string, std::string>
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ object GlutenConfig {
val GLUTEN_ONHEAP_SIZE_KEY = "spark.executor.memory"
val GLUTEN_OFFHEAP_SIZE_KEY = "spark.memory.offHeap.size"
val GLUTEN_OFFHEAP_ENABLED = "spark.memory.offHeap.enabled"
val SPARK_REDACTION_REGEX = "spark.redaction.regex"

// For Soft Affinity Scheduling
// Enable Soft Affinity Scheduling, default value is false
Expand Down Expand Up @@ -683,7 +684,8 @@ object GlutenConfig {
// gcs config
SPARK_GCS_STORAGE_ROOT_URL,
SPARK_GCS_AUTH_TYPE,
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE
SPARK_GCS_AUTH_SERVICE_ACCOUNT_JSON_KEYFILE,
SPARK_REDACTION_REGEX
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down Expand Up @@ -770,7 +772,8 @@ object GlutenConfig {
GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
GLUTEN_OFFHEAP_ENABLED,
SESSION_LOCAL_TIMEZONE.key,
DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key
DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key,
SPARK_REDACTION_REGEX
)
nativeConfMap.putAll(conf.filter(e => keys.contains(e._1)).asJava)

Expand Down

0 comments on commit 14df7e6

Please sign in to comment.