Skip to content

Commit

Permalink
compile regexes at configuration time
Browse files Browse the repository at this point in the history
  • Loading branch information
ben-taussig-solo committed Jan 19, 2024
1 parent 12c1cf4 commit 233a2c9
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,18 @@ message Extraction {
uint32 subgroup = 3;
}

message Regex {
// Only strings matching this regular expression will be part of the
// extraction. The most simple value for this field is '.*', which matches the
// whole source. The field is required. If extraction fails the result is an
// empty value.
string regex = 1;
// If your regex contains capturing groups, use this field to determine which
// group should be selected.
// uint32 subgroup = 2;
// string name = 2;
}

// Defines a transformation template.
message TransformationTemplate {

Expand All @@ -195,6 +207,8 @@ message TransformationTemplate {
// "my-extractor" extractor.
map<string, Extraction> extractors = 2;

map<string, Regex> regexes = 13;

// Use this attribute to transform request/response headers. It consists of a
// map of strings to templates. The string key determines the name of the
// resulting header, the rendered template will determine the value. Any existing
Expand Down
44 changes: 39 additions & 5 deletions source/extensions/filters/http/transformation/inja_transformer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,18 +380,30 @@ json TransformerInstance::raw_string_callback(const inja::Arguments &args) const
return val;
}

json TransformerInstance::replace_callback(const inja::Arguments& args) const {
// NTS: maybe call this regex_replace_callback?
json TransformerInstance::replace_callback(const inja::Arguments &args) const {
const std::string& textToOperateOn = args.at(0)->get_ref<const std::string&>();
const std::string& regexToSearchFor = args.at(1)->get_ref<const std::string&>();
const std::string &regexNameToSearchFor = args.at(1)->get_ref<const std::string &>();
const std::string& stringToReplaceWith = args.at(2)->get_ref<const std::string&>();

// look for the regex in the map of compiled regexes
auto regexes = *tls_.getTyped<ThreadLocalTransformerContext>().regexes_;
auto found = regexes.find(regexNameToSearchFor);
// if the regex is not found, return the original text
if (found == regexes.end()) {
// TODO: log the error
return textToOperateOn;
}

const std::regex regex = found->second;
try {
std::regex regex(regexToSearchFor);
// replace the regex with the string
std::string result = std::regex_replace(textToOperateOn, regex, stringToReplaceWith);
return result;
} catch (const std::regex_error& e) {
// return an empty string if the regex is invalid
return "";
// TODO: log the error
// return original text if the regex is invalid
return textToOperateOn;
}
}

Expand Down Expand Up @@ -452,6 +464,20 @@ InjaTransformer::InjaTransformer(const TransformationTemplate &transformation,
"Failed to parse header template '{}': {}", it->first, e.what()));
}
}

const auto &regexes = transformation.regexes();
for (auto it = regexes.begin(); it != regexes.end(); it++) {
std::string name(it->first);
try {
// try to compile regex. if valid, add it to the map of compiled regexes
std::regex regex_pattern(it->second.regex());
regexes_.emplace_back(std::make_pair(name, regex_pattern));
} catch (const std::exception &e) {
throw EnvoyException(fmt::format(
"Failed to parse regex template '{}': {}", name, e.what()));
}
}

const auto &headers_to_remove = transformation.headers_to_remove();
for (auto idx : headers_to_remove) {
Http::LowerCaseString header_name(idx);
Expand Down Expand Up @@ -589,6 +615,13 @@ void InjaTransformer::transform(Http::RequestOrResponseHeaderMap &header_map,
}
}

// copy regexes to thread local storage
std::unordered_map<std::string, std::regex> regexes;
regexes.reserve(regexes_.size());
for (const auto &regex : regexes_) {
regexes[regex.first] = regex.second;
}

// get cluster metadata
const envoy::config::core::v3::Metadata *cluster_metadata{};
Upstream::ClusterInfoConstSharedPtr ci = callbacks.clusterInfo();
Expand All @@ -604,6 +637,7 @@ void InjaTransformer::transform(Http::RequestOrResponseHeaderMap &header_map,
typed_tls_data.request_headers_ = request_headers;
typed_tls_data.body_ = &get_body;
typed_tls_data.extractions_ = &extractions;
typed_tls_data.regexes_ = &regexes;
typed_tls_data.context_ = &json_body;
typed_tls_data.environ_ = &environ_;
typed_tls_data.cluster_metadata_ = cluster_metadata;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct ThreadLocalTransformerContext : public ThreadLocal::ThreadLocalObject {
const Http::RequestHeaderMap *request_headers_;
const GetBodyFunc *body_;
const std::unordered_map<std::string, absl::string_view> *extractions_;
const std::unordered_map<std::string, std::regex> *regexes_;
const nlohmann::json *context_;
const std::unordered_map<std::string, std::string> *environ_;
const envoy::config::core::v3::Metadata *cluster_metadata_;
Expand Down Expand Up @@ -123,6 +124,7 @@ class InjaTransformer : public Transformer {
std::vector<Http::LowerCaseString> headers_to_remove_;
std::vector<DynamicMetadataValue> dynamic_metadata_;
std::unordered_map<std::string, std::string> environ_;
std::vector<std::pair<std::string, std::regex>> regexes_;

envoy::api::v2::filter::http::TransformationTemplate::RequestBodyParse
parse_body_behavior_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,12 @@ TEST_F(InjaTransformerTest, EscapeCharactersRawStringCallback) {
TEST_F(InjaTransformerTest, ReplaceHappyPath) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};
envoy::api::v2::filter::http::TransformationTemplate transformation;
transformation.mutable_body()->set_text("{{ replace(body(), \"foo\", \"bar\") }}");

envoy::api::v2::filter::http::Regex regex;
regex.set_regex("foo");
(*transformation.mutable_regexes())["regex_1"] = regex;

transformation.mutable_body()->set_text("{{ replace(body(), \"regex_1\", \"bar\") }}");
// set parse body behavior to DontParse so that the body is not parsed as JSON
// this is not necessary to use the replace callback, but personally I think it makes the
// test easier to read
Expand Down Expand Up @@ -1280,26 +1285,28 @@ TEST_F(InjaTransformerTest, ReplaceNoMatch) {
TEST_F(InjaTransformerTest, ReplaceInvalidRegex) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/foo"}};
envoy::api::v2::filter::http::TransformationTemplate transformation;
// Use an invalid regex pattern. In this case, an unbalanced bracket.
transformation.mutable_body()->set_text("{{ replace(body(), \"(foo\", \"bar\") }}");
transformation.set_parse_body_behavior(TransformationTemplate::DontParse);

InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_);
envoy::api::v2::filter::http::Regex regex;
regex.set_regex("(foo");
(*transformation.mutable_regexes())["regex_1"] = regex;

NiceMock<Http::MockStreamDecoderFilterCallbacks> callbacks;
// Use an invalid regex pattern. In this case, an unbalanced bracket.
transformation.mutable_body()->set_text("{{ replace(body(), \"regex_1\", \"bar\") }}");
transformation.set_parse_body_behavior(TransformationTemplate::DontParse);

// We should return an empty string if the regex pattern is invalid
Buffer::OwnedImpl body("foo");
auto expected_body = "";
transformer.transform(headers, &headers, body, callbacks);
EXPECT_EQ(body.toString(), expected_body);
EXPECT_THROW(InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_), EnvoyException);
}

TEST_F(InjaTransformerTest, ReplaceMultipleInstances) {
Http::TestRequestHeaderMapImpl headers{{":method", "GET"}, {":path", "/multi"}};
envoy::api::v2::filter::http::TransformationTemplate transformation;

envoy::api::v2::filter::http::Regex regex;
regex.set_regex("foo");
(*transformation.mutable_regexes())["regex_1"] = regex;

// The regex pattern here is "foo", which should be replaced by "bar" in all occurrences
transformation.mutable_body()->set_text("{{ replace(body(), \"foo\", \"bar\") }}");
transformation.mutable_body()->set_text("{{ replace(body(), \"regex_1\", \"bar\") }}");
transformation.set_parse_body_behavior(TransformationTemplate::DontParse);

InjaTransformer transformer(transformation, rng_, google::protobuf::BoolValue(), tls_);
Expand All @@ -1312,7 +1319,9 @@ TEST_F(InjaTransformerTest, ReplaceMultipleInstances) {
EXPECT_EQ(body.toString(), expected_body);
}

// TODO: maybe test some advanced regex features like lookaheads

} // namespace Transformation
} // namespace HttpFilters
} // namespace Extensions
} // namespace Envoy
} // namespace Envoy

0 comments on commit 233a2c9

Please sign in to comment.