diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index 8e6dd7c1c31f..2416e4ee6de8 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -159,10 +159,19 @@ bool SubstraitToVeloxPlanValidator::validateRegexExpr( LOG_VALIDATION_MSG("Pattern is not string literal for " + name); return false; } - const auto& pattern = patternArg.literal().string(); + + std::string rewrite; + if (name == "regexp_replace " && scalarFunction.arguments().size() > 2) { + const auto& rewriteArg = scalarFunction.arguments()[2].value(); + if (!rewriteArg.has_literal() || !rewriteArg.literal().has_string()) { + LOG_VALIDATION_MSG("Rewrite is not string literal for " + name); + return false; + } + rewrite = rewriteArg.literal().string(); + } std::string error; - if (!validatePattern(pattern, error)) { + if (!validateRe2Function(pattern, rewrite, error)) { LOG_VALIDATION_MSG(name + " due to " + error); return false; } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h index 1fe174928fd9..b94ed9e3a40e 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h @@ -116,7 +116,7 @@ class SubstraitToVeloxPlanValidator { /// Validates regex functions. /// Ensures the second pattern argument is a literal string. - /// Check if the pattern can pass with RE2 compilation. + /// Check if the pattern can pass with RE2 compilation and check rewriteString of regexp_replace is validate bool validateRegexExpr(const std::string& name, const ::substrait::Expression::ScalarFunction& scalarFunction); /// Validate Substrait scarlar function. diff --git a/cpp/velox/utils/Common.cc b/cpp/velox/utils/Common.cc index 7dfffd50798e..e9ebd772ddaa 100644 --- a/cpp/velox/utils/Common.cc +++ b/cpp/velox/utils/Common.cc @@ -52,13 +52,23 @@ std::unique_ptr compilePattern(const std::string& pattern) { return std::make_unique(re2::StringPiece(pattern), RE2::Quiet); } -bool validatePattern(const std::string& pattern, std::string& error) { +bool validateRe2Function(const std::string& pattern, const std::string& rewrite, std::string& error) { auto re2 = compilePattern(pattern); if (!re2->ok()) { error = "Pattern " + pattern + " compilation failed in RE2. Reason: " + re2->error(); return false; } - return ensureRegexIsCompatible(pattern, error); + + if (!ensureRegexIsCompatible(pattern, error)) { + return false; + } + + std::string err; + if (!rewrite.empty() && !re2->CheckRewriteString(re2::StringPiece(rewrite), &err)) { + error = "Rewrite " + rewrite + "check failed in RE2. Reason: " + err; + return false; + } + return true; } } // namespace gluten diff --git a/cpp/velox/utils/Common.h b/cpp/velox/utils/Common.h index eaa551da0566..dd23afb7a7d7 100644 --- a/cpp/velox/utils/Common.h +++ b/cpp/velox/utils/Common.h @@ -29,7 +29,7 @@ namespace gluten { // Compile the given pattern and return the RE2 object. inline std::unique_ptr compilePattern(const std::string& pattern); -bool validatePattern(const std::string& pattern, std::string& error); +bool validateRe2Function(const std::string& pattern, const std::string& rewrite, std::string& error); static inline void fastCopy(void* dst, const void* src, size_t n) { facebook::velox::simd::memcpy(dst, src, n);