diff --git a/CHANGELOG.md b/CHANGELOG.md index 7425b761a..62df9414c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,15 +10,16 @@ ### Bugfix - * fixes bug where missing key in credentials file leads to AttributeError +* fixes a bug in grokker rules, where common field prefixes wasn't possible +* fixes bug where missing key in credentials file leads to AttributeError ## 11.1.0 ### Features - * new documentation part with security best practices which compiles to `user_manual/security/best_practices.html` - * also comes with excel export functionality of given best practices - * add basic auth to http_input +* new documentation part with security best practices which compiles to `user_manual/security/best_practices.html` + * also comes with excel export functionality of given best practices +* add basic auth to http_input ### Bugfix diff --git a/logprep/processor/grokker/rule.py b/logprep/processor/grokker/rule.py index aed04050e..ffd5468ee 100644 --- a/logprep/processor/grokker/rule.py +++ b/logprep/processor/grokker/rule.py @@ -68,10 +68,13 @@ def _dotted_field_to_logstash_converter(mapping: dict) -> dict: def _transform(pattern): # nosemgrep fields = re.findall(FIELD_PATTERN, pattern) for dotted_field, _ in fields: - splitted_field = dotted_field.split(".") - if len(splitted_field) > 1: - replacement = "".join(f"[{element}]" for element in splitted_field) - pattern = re.sub(re.escape(dotted_field), replacement, pattern) + if "." in dotted_field: + replacement = "".join(f"[{element}]" for element in dotted_field.split(".")) + # ensure full field is replaced by scanning for ':' at the front and '}' or ':' + # at the end in the pattern. Also add them again in the replacement string. + pattern = re.sub( + f":{re.escape(dotted_field)}([}}:])", f":{replacement}\\1", pattern + ) return pattern def _replace_pattern(pattern): @@ -116,7 +119,7 @@ class Config(DissectorRule.Config): pattern. It is possible to use `oniguruma` regex pattern with or without grok patterns in the patterns part. When defining an `oniguruma` there is a limitation of three nested - parentheses inside the pattern. Applying more nested parentheses is not possible. + parentheses inside the pattern. Applying more nested parentheses is not possible. Logstashs ecs conform grok patterns are used to resolve the here used grok patterns. When writing patterns it is advised to be careful as the underlying regex can become complex fast. If the execution and the resolving of the pattern takes more than one second a diff --git a/tests/unit/processor/grokker/test_grokker.py b/tests/unit/processor/grokker/test_grokker.py index 229334162..4a24e0a00 100644 --- a/tests/unit/processor/grokker/test_grokker.py +++ b/tests/unit/processor/grokker/test_grokker.py @@ -310,6 +310,22 @@ "port": 1234, }, ), + ( + "Subfield with common prefix", + { + "filter": "message", + "grokker": { + "mapping": { + "message": "Facility %{USER:facility.location} %{USER:facility.location_level}" + } + }, + }, + {"message": "Facility spain primary"}, + { + "message": "Facility spain primary", + "facility": {"location": "spain", "location_level": "primary"}, + }, + ), ] failure_test_cases = [