Skip to content

Commit

Permalink
make extend_target_list preserve ordering (#593)
Browse files Browse the repository at this point in the history
* add test for fieldmanager

---------

Co-authored-by: djkhl <[email protected]>
  • Loading branch information
ekneg54 and djkhl authored Jun 5, 2024
1 parent b8353ff commit 28d7c54
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 51 deletions.
7 changes: 6 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@
"**/.profile": true
},
"python.analysis.typeCheckingMode": "off",
"python.terminal.activateEnvInCurrentTerminal": true,
"python.analysis.enablePytestSupport": true,
"python.testing.pytestArgs": [
"-vv"
],
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
}
}
124 changes: 81 additions & 43 deletions logprep/processor/field_manager/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"""

import itertools
from typing import Any, List, Tuple
from collections import namedtuple

from logprep.abc.processor import Processor
from logprep.processor.base.exceptions import FieldExistsWarning
Expand Down Expand Up @@ -102,40 +102,78 @@ def _write_to_multiple_targets(self, event, target_fields, field_values, rule, r
raise FieldExistsWarning(rule, event, unsuccessful_targets)

def _write_to_single_target(self, args, extend_target_list, overwrite_target, rule):
if extend_target_list and overwrite_target:
self._overwrite_with_list_from_source_field_values(*args)
if extend_target_list and not overwrite_target:
self._overwrite_with_list_from_source_field_values_include_target_field_value(*args)
if not extend_target_list and overwrite_target:
self._overwrite_target_with_source_field_values(*args)
if not extend_target_list and not overwrite_target:
self._add_field_to(*args, rule=rule)

def _add_field_to(self, *args, rule):
event, target_field, field_values = args
if len(field_values) == 1:
field_values = field_values.pop()
successful = add_field_to(event, target_field, field_values, False, False)
if not successful:
raise FieldExistsWarning(rule, event, [target_field])

def _overwrite_target_with_source_field_values(self, event, target_field, field_values):
if len(field_values) == 1:
field_values = field_values.pop()
add_and_overwrite(event, target_field, field_values)

def _overwrite_with_list_from_source_field_values_include_target_field_value(self, *args):
event, target_field, field_values = args
origin_field_value = get_dotted_field_value(event, target_field)
if origin_field_value is not None:
field_values.append(origin_field_value)
self._overwrite_with_list_from_source_field_values(event, target_field, field_values)

def _overwrite_with_list_from_source_field_values(self, *args):
event, target_field, field_values = args
lists, other = self._separate_lists_form_other_types(field_values)
target_field_value = self._get_deduplicated_sorted_flatten_list(lists, other)
add_and_overwrite(event, target_field, target_field_value)
event, target_field, source_fields_values = args
target_field_value = get_dotted_field_value(event, target_field)
State = namedtuple(
"State",
["overwrite", "extend", "single_source_element", "target_is_list", "target_is_none"],
)
state = State(
overwrite=overwrite_target,
extend=extend_target_list,
single_source_element=len(source_fields_values) == 1,
target_is_list=isinstance(target_field_value, list),
target_is_none=target_field_value is None,
)
if state.single_source_element and not state.extend:
source_fields_values = source_fields_values.pop()

match state:
case State(
extend=True, overwrite=True, single_source_element=False, target_is_list=False
):
add_and_overwrite(event, target_field, source_fields_values)
return

case State(
extend=True,
overwrite=False,
single_source_element=False,
target_is_list=False,
target_is_none=True,
):
flattened_source_fields = self._overwrite_from_source_values(source_fields_values)
source_fields_values = [*flattened_source_fields]
add_and_overwrite(event, target_field, source_fields_values)
return

case State(
extend=True, overwrite=False, single_source_element=False, target_is_list=False
):
source_fields_values = [target_field_value, *source_fields_values]
add_and_overwrite(event, target_field, source_fields_values)
return

case State(
extend=True, overwrite=False, single_source_element=False, target_is_list=True
):
flattened_source_fields = self._overwrite_from_source_values(source_fields_values)
source_fields_values = [*target_field_value, *flattened_source_fields]
add_and_overwrite(event, target_field, source_fields_values)
return

case State(overwrite=True, extend=True):
flattened_source_fields = self._overwrite_from_source_values(source_fields_values)
source_fields_values = [*flattened_source_fields]
add_and_overwrite(event, target_field, source_fields_values)
return

case _:
success = add_field_to(
event, target_field, source_fields_values, state.extend, state.overwrite
)
if not success:
raise FieldExistsWarning(rule, event, [target_field])

def _overwrite_from_source_values(self, source_fields_values):
duplicates = []
ordered_flatten_list = []
flat_source_fields = self._get_flatten_source_fields(source_fields_values)
for field_value in flat_source_fields:
if field_value not in duplicates:
duplicates.append(field_value)
ordered_flatten_list.append(field_value)
return ordered_flatten_list

def _handle_missing_fields(self, event, rule, source_fields, field_values):
if rule.ignore_missing_fields:
Expand All @@ -161,14 +199,14 @@ def _get_missing_fields_error(self, source_fields, field_values):
return error

@staticmethod
def _separate_lists_form_other_types(field_values: List[Any]) -> Tuple[List[List], List[Any]]:
field_values_lists = list(filter(lambda x: isinstance(x, list), field_values))
field_values_not_list = list(filter(lambda x: not isinstance(x, list), field_values))
return field_values_lists, field_values_not_list

@staticmethod
def _get_deduplicated_sorted_flatten_list(lists: List[List], not_lists: List[Any]) -> List:
return sorted(list({*sum(lists, []), *not_lists}))
def _get_flatten_source_fields(source_fields_values):
flat_source_fields = []
for item in source_fields_values:
if isinstance(item, list):
flat_source_fields.extend(item)
else:
flat_source_fields.append(item)
return flat_source_fields

@staticmethod
def _filter_missing_fields(source_field_values, targets):
Expand Down
30 changes: 23 additions & 7 deletions tests/unit/processor/field_manager/test_field_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@
"field3": ["value5", "value6", "value4"],
"new_field": ["i exist"],
},
{"new_field": ["i exist", "value1", "value2", "value3", "value4", "value5", "value6"]},
{"new_field": ["i exist", "value1", "value2", "value3", "value5", "value4", "value6"]},
),
(
(
Expand All @@ -232,7 +232,7 @@
"field3": ["value5", "value6", "value4"],
"new_field": ["i exist"],
},
{"new_field": ["value1", "value2", "value3", "value4", "value5", "value6"]},
{"new_field": ["value1", "value2", "value3", "value5", "value4", "value6"]},
),
(
"real world example from documentation",
Expand Down Expand Up @@ -270,14 +270,14 @@
"source": {"ip": "10.10.2.33"},
"related": {
"ip": [
"10.10.2.33",
"127.0.0.1",
"180.22.66.1",
"180.22.66.3",
"fe89::",
"192.168.5.1",
"223.2.3.2",
"8.8.8.8",
"fe89::",
"180.22.66.3",
"10.10.2.33",
"180.22.66.1",
"223.2.3.2",
]
},
},
Expand Down Expand Up @@ -420,6 +420,22 @@
"target_field": "first",
},
),
(
"extend_target_list preserves list ordering",
{
"filter": "(foo) OR (test)",
"field_manager": {
"id": "5cfa7a26-94af-49de-bc82-460c42e9dc56",
"source_fields": ["foo", "test"],
"target_field": "existing_list",
"delete_source_fields": False,
"overwrite_target": False,
"extend_target_list": True,
},
},
{"existing_list": ["hello", "world"], "foo": "bar", "test": "value"},
{"existing_list": ["hello", "world", "bar", "value"], "foo": "bar", "test": "value"},
),
]

failure_test_cases = [
Expand Down

0 comments on commit 28d7c54

Please sign in to comment.