From ccad610a9487fe200974fe1abe2ba2cb24c21193 Mon Sep 17 00:00:00 2001 From: ppcad <45867125+ppcad@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:38:45 +0100 Subject: [PATCH] Fix log arrival timezone (#715) * Add option to TimeParser to get current timestamp with timezone * Fix timezone in log arrival and delta time * Add TimeParser and log arrival time changes to changelog --------- Co-authored-by: dtrai2 <95028228+dtrai2@users.noreply.github.com> --- CHANGELOG.md | 2 ++ logprep/abc/input.py | 17 ++++++++++------- logprep/util/time.py | 9 +++++++-- tests/unit/util/test_time.py | 19 +++++++++++++++---- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e82beaadf..7d96ba655 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ the list is now fixed inside the packaged logprep * refactored some processors to make use of the new helper methods * add `pre-commit` hooks to the repository, install new dev dependency and run `pre-commit install` in the root dir * the default `securityContext`for the pod is now configurable +* allow `TimeParser` to get the current time with a specified timezone instead of always using local time and setting the timezone to UTC * remove `tldextract` dependency * remove `urlextract` dependency @@ -34,6 +35,7 @@ the list is now fixed inside the packaged logprep * fix `confluent_kafka.store_offsets` if `last_valid_record` is `None`, can happen if a rebalancing happens before the first message was pulled. * fix pseudonymizer cache metrics not updated +* fix incorrect timezones for log arrival time and delta time in input preprocessing * fix `_get_value` in `FilterExpression` so that keys don't match on values * fix `auto_rule_tester` to work with `LOGPREP_BYPASS_RULE_TREE` enabled diff --git a/logprep/abc/input.py b/logprep/abc/input.py index 396d995c0..585a5f4de 100644 --- a/logprep/abc/input.py +++ b/logprep/abc/input.py @@ -8,9 +8,10 @@ import zlib from abc import abstractmethod from copy import deepcopy -from functools import partial +from functools import partial, cached_property from hmac import HMAC from typing import Optional, Tuple +from zoneinfo import ZoneInfo from attrs import define, field, validators @@ -198,6 +199,11 @@ def _add_version_info(self): """Check and return if the version info should be added to the event.""" return bool(self._config.preprocessing.get("version_info_target_field")) + @cached_property + def _log_arrival_timestamp_timezone(self): + """Returns the timezone for log arrival timestamps""" + return ZoneInfo("UTC") + @property def _add_log_arrival_time_information(self): """Check and return if the log arrival time info should be added to the event.""" @@ -311,12 +317,9 @@ def _add_env_enrichment_to_event(self, event: dict): add_fields_to(event, fields) def _add_arrival_time_information_to_event(self, event: dict): - new_field = { - self._config.preprocessing.get( - "log_arrival_time_target_field" - ): TimeParser.now().isoformat() - } - add_fields_to(event, new_field) + target = self._config.preprocessing.get("log_arrival_time_target_field") + time = TimeParser.now(self._log_arrival_timestamp_timezone).isoformat() + add_fields_to(event, {target: time}) def _add_arrival_timedelta_information_to_event(self, event: dict): log_arrival_timedelta_config = self._config.preprocessing.get("log_arrival_timedelta") diff --git a/logprep/util/time.py b/logprep/util/time.py index 66046ca22..75a57dc86 100644 --- a/logprep/util/time.py +++ b/logprep/util/time.py @@ -61,15 +61,20 @@ def from_timestamp(cls, timestamp: Union[int, float]) -> datetime: return time_object @classmethod - def now(cls) -> datetime: + def now(cls, timezone: tzinfo = None) -> datetime: """returns the current time + Parameters + ---------- + timezone : tzinfo + the timezone to use for the timestamp + Returns ------- datetime current date and time as datetime """ - time_object = datetime.now() + time_object = datetime.now(timezone) time_object = cls._set_utc_if_timezone_is_missing(time_object) return time_object diff --git a/tests/unit/util/test_time.py b/tests/unit/util/test_time.py index 4599f6fda..f3d3c876c 100644 --- a/tests/unit/util/test_time.py +++ b/tests/unit/util/test_time.py @@ -60,11 +60,22 @@ def test_from_format_returns(self, source, format_str, expected): for attribute, value in expected.items(): assert getattr(timestamp, attribute) == value + @pytest.mark.parametrize("timezone", [None, ZoneInfo("UTC"), ZoneInfo("Europe/Berlin")]) + def test_has_utc_if_timezone_was_set(self, timezone): + datetime_time = datetime.now(timezone) + time_parser_time = TimeParser.now(timezone) + assert time_parser_time.second == pytest.approx(datetime_time.second, abs=1) + if timezone is None: + assert time_parser_time.tzinfo == ZoneInfo("UTC") + else: + assert time_parser_time.tzinfo == timezone + def test_set_utc_if_timezone_is_missing_sets_timezone(self): - time_object = datetime.now() - assert time_object.tzinfo is None - time_object = TimeParser._set_utc_if_timezone_is_missing(time_object) - assert time_object.tzinfo is ZoneInfo("UTC") + datetime_time = datetime.now() + assert datetime_time.tzinfo is None + time_parser_time = TimeParser._set_utc_if_timezone_is_missing(datetime_time) + assert time_parser_time.tzinfo is ZoneInfo("UTC") + assert time_parser_time.second == pytest.approx(datetime_time.second, abs=1) @pytest.mark.parametrize( "timestamp, source_format, source_timezone, expected_timezone_name, expected",