Skip to content

Commit

Permalink
Merge branch 'main' into dev-refactor-generic-resolver-rule-validation
Browse files Browse the repository at this point in the history
  • Loading branch information
dtrai2 authored Nov 11, 2024
2 parents 39a0c2a + 81d6897 commit b36d11d
Show file tree
Hide file tree
Showing 30 changed files with 102 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ examples/k8s/charts
*.so
target
wheelhouse
requirements.*
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,15 @@
### Features
### Improvements

* replace `BaseException` with `Exception` for custom errors
* refactor `generic_resolver` to validate rules on startup instead of application of each rule

### Bugfix

- fix `confluent_kafka.store_offsets` if `last_valid_record` is `None`, can happen if a rebalancing happens
before the first message was pulled.
- fix pseudonymizer cache metrics not updated

## 14.0.0
### Breaking

Expand Down
2 changes: 1 addition & 1 deletion logprep/abc/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def _has_missing_values(self, event, rule, source_field_dict):
if missing_fields:
if rule.ignore_missing_fields:
return True
error = BaseException(f"{self.name}: no value for fields: {missing_fields}")
error = Exception(f"{self.name}: no value for fields: {missing_fields}")
self._handle_warning_error(event, rule, error)
return True
return False
Expand Down
3 changes: 3 additions & 0 deletions logprep/connector/confluent_kafka/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ def batch_finished_callback(self) -> None:
"""
if self._enable_auto_offset_store:
return
# in case the ConfluentKafkaInput._revoke_callback is triggered before the first message was polled
if not self._last_valid_record:
return
try:
self._consumer.store_offsets(message=self._last_valid_record)
except KafkaException as error:
Expand Down
6 changes: 3 additions & 3 deletions logprep/connector/dummy/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
A dummy input that returns the documents it was initialized with.
If a "document" is derived from BaseException, that exception will be thrown instead of
If a "document" is derived from Exception, that exception will be thrown instead of
returning a document. The exception will be removed and subsequent calls may return documents or
throw other exceptions in the given order.
Expand Down Expand Up @@ -36,7 +36,7 @@ class DummyInput(Input):
class Config(Input.Config):
"""DummyInput specific configuration"""

documents: List[Union[dict, type, BaseException]]
documents: List[Union[dict, type, Exception]]
"""A list of documents that should be returned."""
repeat_documents: Optional[str] = field(
validator=validators.instance_of(bool), default=False
Expand All @@ -57,6 +57,6 @@ def _get_event(self, timeout: float) -> tuple:

document = self._documents.pop(0)

if (document.__class__ == type) and issubclass(document, BaseException):
if (document.__class__ == type) and issubclass(document, Exception):
raise document
return document, None
2 changes: 1 addition & 1 deletion logprep/connector/file/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FileInput
==========
A generic line input that returns the documents it was initialized with.
If a "document" is derived from BaseException, that exception will be thrown instead of
If a "document" is derived from Exception, that exception will be thrown instead of
returning a document. The exception will be removed and subsequent calls may return documents or
throw other exceptions in the given order.
Expand Down
2 changes: 1 addition & 1 deletion logprep/connector/json/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
A json input that returns the documents it was initialized with.
If a "document" is derived from BaseException, that exception will be thrown instead of
If a "document" is derived from Exception, that exception will be thrown instead of
returning a document. The exception will be removed and subsequent calls may return documents or
throw other exceptions in the given order.
Expand Down
2 changes: 1 addition & 1 deletion logprep/connector/jsonl/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
A json line input that returns the documents it was initialized with.
If a "document" is derived from BaseException, that exception will be thrown instead of
If a "document" is derived from Exception, that exception will be thrown instead of
returning a document. The exception will be removed and subsequent calls may return documents or
throw other exceptions in the given order.
Expand Down
2 changes: 1 addition & 1 deletion logprep/filter/expression/filter_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, List


class FilterExpressionError(BaseException):
class FilterExpressionError(Exception):
"""Base class for FilterExpression related exceptions."""


Expand Down
2 changes: 1 addition & 1 deletion logprep/filter/lucene_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
logger = logging.getLogger("LuceneFilter")


class LuceneFilterError(BaseException):
class LuceneFilterError(Exception):
"""Base class for LuceneFilter related exceptions."""


Expand Down
4 changes: 2 additions & 2 deletions logprep/processor/amides/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from logprep.processor.amides.features import CommaSeparation


class DetectionModelError(BaseException):
class DetectionModelError(Exception):
"""Base exception class for all RuleModel-related errors."""


Expand Down Expand Up @@ -98,7 +98,7 @@ def detect(self, sample: str) -> Tuple[bool, float]:
return False, round(confidence_value, 3)


class RuleAttributorError(BaseException):
class RuleAttributorError(Exception):
"""Base class for all RuleAttributor-related Errors."""


Expand Down
4 changes: 1 addition & 3 deletions logprep/processor/dissector/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, str, str, str
if loop_content is None:
if rule.ignore_missing_fields:
continue
error = BaseException(
f"dissector: mapping field '{source_field}' does not exist"
)
error = Exception(f"dissector: mapping field '{source_field}' does not exist")
self._handle_warning_error(event, rule, error)
if delimiter is not None and loop_content is not None:
content, _, loop_content = loop_content.partition(delimiter)
Expand Down
3 changes: 1 addition & 2 deletions logprep/processor/field_manager/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,7 @@ def _get_field_values(event, source):

def _get_missing_fields_error(self, source_fields, field_values):
missing_fields = [key for key, value in zip(source_fields, field_values) if value is None]
error = BaseException(f"{self.name}: missing source_fields: {missing_fields}")
return error
return Exception(f"{self.name}: missing source_fields: {missing_fields}")

@staticmethod
def _get_flatten_source_fields(source_fields_values):
Expand Down
2 changes: 1 addition & 1 deletion logprep/processor/labeler/labeling_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from logprep.util.getter import GetterFactory


class LabelingSchemaError(BaseException):
class LabelingSchemaError(Exception):
"""Base class for LabelingSchema related exceptions."""


Expand Down
2 changes: 1 addition & 1 deletion logprep/processor/list_comparison/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from logprep.util.helper import add_field_to, get_dotted_field_value


class ListComparisonError(BaseException):
class ListComparisonError(Exception):
"""Base class for ListComparison related exceptions."""

def __init__(self, name: str, message: str):
Expand Down
8 changes: 4 additions & 4 deletions logprep/processor/pseudonymizer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,9 @@ def _wrap_hash(self, hash_string: str) -> str:
def _update_cache_metrics(self):
cache_info_pseudonyms = self._get_pseudonym_dict_cached.cache_info()
cache_info_urls = self._pseudonymize_url_cached.cache_info()
self.metrics.new_results = cache_info_pseudonyms.misses + cache_info_urls.misses
self.metrics.cached_results = cache_info_pseudonyms.hits + cache_info_urls.hits
self.metrics.num_cache_entries = cache_info_pseudonyms.currsize + cache_info_urls.currsize
self.metrics.cache_load = (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / (
self.metrics.new_results += cache_info_pseudonyms.misses + cache_info_urls.misses
self.metrics.cached_results += cache_info_pseudonyms.hits + cache_info_urls.hits
self.metrics.num_cache_entries += cache_info_pseudonyms.currsize + cache_info_urls.currsize
self.metrics.cache_load += (cache_info_pseudonyms.currsize + cache_info_urls.currsize) / (
cache_info_pseudonyms.maxsize + cache_info_urls.maxsize
)
2 changes: 1 addition & 1 deletion logprep/processor/template_replacer/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from logprep.util.helper import add_field_to, get_dotted_field_value


class TemplateReplacerError(BaseException):
class TemplateReplacerError(Exception):
"""Base class for TemplateReplacer related exceptions."""

def __init__(self, name: str, message: str):
Expand Down
2 changes: 1 addition & 1 deletion logprep/util/auto_rule_tester/auto_rule_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@


# pylint: disable=protected-access
class AutoRuleTesterException(BaseException):
class AutoRuleTesterException(Exception):
"""Base class for AutoRuleTester related exceptions."""

def __init__(self, message: str):
Expand Down
2 changes: 1 addition & 1 deletion logprep/util/grok_pattern_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
PATTERN_CONVERSION = [("[[:alnum:]]", r"\w")]


class GrokPatternLoaderError(BaseException):
class GrokPatternLoaderError(Exception):
"""Base class for GrokPatternLoader related exceptions."""

def __init__(self, message: str):
Expand Down
2 changes: 1 addition & 1 deletion logprep/util/pre_detector_rule_matching_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


# pylint: disable=protected-access
class MatchingRuleTesterException(BaseException):
class MatchingRuleTesterException(Exception):
"""Base class for MatchingRuleTester related exceptions."""

def __init__(self, message: str):
Expand Down
Empty file removed requirements.in
Empty file.
Empty file removed requirements.txt
Empty file.
2 changes: 1 addition & 1 deletion tests/unit/connector/test_confluent_kafka_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_create_fails_for_unknown_option(self):
def test_error_callback_logs_error(self):
self.object.metrics.number_of_errors = 0
with mock.patch("logging.Logger.error") as mock_error:
test_error = BaseException("test error")
test_error = Exception("test error")
self.object._error_callback(test_error)
mock_error.assert_called()
mock_error.assert_called_with(f"{self.object.describe()}: {test_error}")
Expand Down
13 changes: 11 additions & 2 deletions tests/unit/connector/test_confluent_kafka_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@ def test_batch_finished_callback_calls_store_offsets(self, _):
kafka_consumer.store_offsets.assert_called()
kafka_consumer.store_offsets.assert_called_with(message=message)

@mock.patch("logprep.connector.confluent_kafka.input.Consumer")
def test_batch_finished_callback_calls_store_offsets(self, _):
input_config = deepcopy(self.CONFIG)
kafka_input = Factory.create({"test": input_config})
kafka_consumer = kafka_input._consumer
kafka_input._last_valid_record = None
kafka_input.batch_finished_callback()
kafka_consumer.store_offsets.assert_not_called()

@mock.patch("logprep.connector.confluent_kafka.input.Consumer")
def test_batch_finished_callback_raises_input_warning_on_kafka_exception(self, _):
input_config = deepcopy(self.CONFIG)
Expand All @@ -119,7 +128,7 @@ def raise_generator(return_sequence):
return list(reversed(return_sequence)).pop()

kafka_consumer.store_offsets.side_effect = raise_generator(return_sequence)
kafka_input._last_valid_records = {0: "message"}
kafka_input._last_valid_record = {0: "message"}
with pytest.raises(InputWarning):
kafka_input.batch_finished_callback()

Expand Down Expand Up @@ -225,7 +234,7 @@ def test_get_next_raises_critical_input_parsing_error(self):

def test_commit_callback_raises_warning_error_and_counts_failures(self):
with pytest.raises(InputWarning, match="Could not commit offsets"):
self.object._commit_callback(BaseException, ["topic_partition"])
self.object._commit_callback(Exception, ["topic_partition"])
assert self.object._commit_failures == 1

def test_commit_callback_counts_commit_success(self):
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/connector/test_dummy_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from tests.unit.connector.base import BaseInputTestCase


class DummyError(BaseException):
class DummyError(Exception):
pass


Expand Down Expand Up @@ -44,9 +44,9 @@ def test_raises_exceptions_instead_of_returning_them_in_document(self):

def test_raises_exceptions_instead_of_returning_them(self):
config = copy.deepcopy(self.CONFIG)
config["documents"] = [BaseException]
config["documents"] = [Exception]
self.object = Factory.create({"Test Instance Name": config})
with raises(BaseException):
with raises(Exception):
self.object.get_next(self.timeout)

def test_repeat_documents_repeats_documents(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/connector/test_dummy_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_raises_exception_on_call_to_store(self):
config.update({"exceptions": ["FatalOutputError"]})
dummy_output = Factory.create({"test connector": config})

with raises(BaseException, match="FatalOutputError"):
with raises(Exception, match="FatalOutputError"):
dummy_output.store({"order": 0})

def test_raises_exception_on_call_to_store_custom(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/connector/test_json_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from tests.unit.connector.base import BaseInputTestCase


class DummyError(BaseException):
class DummyError(Exception):
pass


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ def test_apply_signature_engine(self):
def test_exception_if_raw_text_with_start_tag():
log_record = LogRecord(raw_text="Test log with start tag <+> must raise an exception")
sign_engine = SignatureEngine()
with pytest.raises(BaseException, match=r"Start-tag <\+> in raw log message"):
with pytest.raises(Exception, match=r"Start-tag <\+> in raw log message"):
sign_engine.run(log_record, LogSaltModeTestComposition.rules[0])

@staticmethod
def test_exception_if_raw_text_with_end_tag():
log_record = LogRecord(raw_text="Test log with end tag </+> must raise an exception")
sign_engine = SignatureEngine()
with pytest.raises(BaseException, match=r"End-tag </\+> in raw log message"):
with pytest.raises(Exception, match=r"End-tag </\+> in raw log message"):
sign_engine.run(log_record, LogSaltModeTestComposition.rules[0])

@staticmethod
Expand All @@ -64,7 +64,7 @@ def test_missing_end_tag_in_sig_text():
"Test log with a start tag <+>, but a missing end tag, " "must raise an exception"
)
stp = SignatureTagParser()
with pytest.raises(BaseException):
with pytest.raises(Exception):
stp.calculate_signature(sig_text)


Expand Down
Loading

0 comments on commit b36d11d

Please sign in to comment.