From 6556a04f5318134940ddd779700770e69fc66975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Zimmermann?= <101292599+ekneg54@users.noreply.github.com> Date: Tue, 14 May 2024 13:48:47 +0200 Subject: [PATCH] remove logger from signatures (#589) * remove logger from component signatures * remove logger from factory * remove logger from factory.create * remove warning output from corpustester --------- Co-authored-by: dtrai2 --- CHANGELOG.md | 2 + .../architecture/diagramms/output.drawio.html | 2 +- .../processor_examples/calculator.ipynb | 436 +++++----- .../processor_examples/concatenator.ipynb | 460 +++++----- .../processor_examples/dissector.ipynb | 466 +++++----- .../processor_examples/field_manager.ipynb | 556 ++++++------ .../processor_examples/generic_adder.ipynb | 386 ++++----- .../geo_ip_enricher_custom_outputfields.ipynb | 524 ++++++------ .../processor_examples/grokker.ipynb | 426 +++++----- .../processor_examples/ip_informer.ipynb | 800 +++++++++--------- .../processor_examples/key_checker.ipynb | 658 +++++++------- .../processor_examples/requester.ipynb | 472 +++++------ .../processor_examples/string_splitter.ipynb | 514 +++++------ .../processor_examples/timestamp_differ.ipynb | 388 ++++----- .../processor_examples/timestamper.ipynb | 396 ++++----- logprep/abc/component.py | 5 +- logprep/abc/output.py | 4 +- logprep/abc/processor.py | 24 +- logprep/connector/confluent_kafka/input.py | 40 +- logprep/connector/confluent_kafka/output.py | 7 +- logprep/connector/dummy/output.py | 9 +- logprep/connector/elasticsearch/output.py | 9 +- logprep/connector/file/input.py | 4 +- logprep/connector/http/input.py | 17 +- logprep/connector/jsonl/output.py | 4 +- logprep/connector/opensearch/output.py | 2 +- logprep/connector/s3/output.py | 13 +- logprep/factory.py | 8 +- logprep/framework/pipeline.py | 6 +- logprep/framework/pipeline_manager.py | 12 +- logprep/processor/amides/processor.py | 7 +- logprep/processor/clusterer/processor.py | 6 +- .../domain_label_extractor/processor.py | 7 +- .../processor/domain_resolver/processor.py | 16 +- .../generic_adder/mysql_connector.py | 11 +- logprep/processor/generic_adder/processor.py | 9 +- .../processor/generic_resolver/processor.py | 9 +- logprep/processor/geoip_enricher/processor.py | 7 +- logprep/processor/grokker/processor.py | 7 +- .../processor/hyperscan_resolver/processor.py | 10 +- logprep/processor/labeler/processor.py | 10 +- .../processor/list_comparison/processor.py | 6 +- logprep/processor/normalizer/processor.py | 5 +- logprep/processor/pseudonymizer/processor.py | 5 +- .../selective_extractor/processor.py | 10 +- .../processor/template_replacer/processor.py | 4 +- .../auto_rule_corpus_tester.py | 37 +- .../util/auto_rule_tester/auto_rule_tester.py | 2 +- logprep/util/configuration.py | 8 +- logprep/util/defaults.py | 8 +- .../util/pre_detector_rule_matching_tester.py | 2 +- tests/unit/component/base.py | 2 +- tests/unit/connector/base.py | 40 +- .../connector/test_confluent_kafka_common.py | 2 +- .../connector/test_confluent_kafka_input.py | 14 +- .../connector/test_confluent_kafka_output.py | 4 +- tests/unit/connector/test_dummy_input.py | 2 +- tests/unit/connector/test_dummy_output.py | 8 +- tests/unit/connector/test_http_input.py | 20 +- tests/unit/connector/test_json_input.py | 2 +- tests/unit/connector/test_jsonl_input.py | 2 +- .../unit/connector/test_opensearch_output.py | 2 +- tests/unit/connector/test_real_kafka.py | 18 +- tests/unit/connector/test_s3_output.py | 14 +- .../exceptions/test_connector_exceptions.py | 51 +- .../framework/rule_tree/test_rule_tree.py | 3 +- tests/unit/framework/test_pipeline.py | 24 +- tests/unit/framework/test_pipeline_manager.py | 6 +- tests/unit/processor/base.py | 16 +- .../test_domain_label_extractor.py | 4 +- .../domain_resolver/test_domain_resolver.py | 12 +- .../generic_adder/test_generic_adder.py | 8 +- tests/unit/processor/grokker/test_grokker.py | 2 +- tests/unit/processor/labeler/test_labeler.py | 6 +- .../list_comparison/test_list_comparison.py | 2 +- .../processor/normalizer/test_normalizer.py | 2 +- .../pseudonymizer/test_pseudonymizer.py | 6 +- .../test_template_replacer.py | 12 +- tests/unit/processor/test_process.py | 12 +- tests/unit/test_factory.py | 28 +- .../unit/util/test_auto_rule_corpus_tester.py | 32 +- 81 files changed, 3536 insertions(+), 3656 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa54e7987..499d63aeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ ### Improvements +* remove logger from Components and Factory signatures + ## 11.3.0 ### Features diff --git a/doc/source/development/architecture/diagramms/output.drawio.html b/doc/source/development/architecture/diagramms/output.drawio.html index a1fd38152..5edcdf70a 100644 --- a/doc/source/development/architecture/diagramms/output.drawio.html +++ b/doc/source/development/architecture/diagramms/output.drawio.html @@ -5,7 +5,7 @@ output -
+
\ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/calculator.ipynb b/doc/source/development/notebooks/processor_examples/calculator.ipynb index 0b531df76..8822d2501 100644 --- a/doc/source/development/notebooks/processor_examples/calculator.ipynb +++ b/doc/source/development/notebooks/processor_examples/calculator.ipynb @@ -1,226 +1,226 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Calculator\n", - "\n", - "This presentations goal it to introduce the features of the `Calculator` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want calculate with field values." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " 'message': {\n", - " \"time_in_ms\": 0.333\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " 'message': {\n", - " \"time_in_ns\": 333000\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "153" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculator\n", + "\n", + "This presentations goal it to introduce the features of the `Calculator` and how to configure it." ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"message.time_in_ms\"\n", - "calculator:\n", - " target_field: message.time_in_ns\n", - " calc: trunc(${message.time_in_ms} * 10e5)\n", - " delete_source_fields: true\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"calculator\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"calculation.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"mycalculator\":{ \n", - " \"type\": \"calculator\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "calculator" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want calculate with field values." ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "calculator = Factory.create(processor_config, mock_logger)\n", - "calculator" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " 'message': {\n", + " \"time_in_ms\": 0.333\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'message': {'time_in_ms': 0.333}}\n", - "after: {'message': {'time_in_ns': 333000}}\n", - "True\n" - ] + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " 'message': {\n", + " \"time_in_ns\": 333000\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "153" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"message.time_in_ms\"\n", + "calculator:\n", + " target_field: message.time_in_ns\n", + " calc: trunc(${message.time_in_ms} * 10e5)\n", + " delete_source_fields: true\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"calculator\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"calculation.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"mycalculator\":{ \n", + " \"type\": \"calculator\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "calculator" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "calculator = Factory.create(processor_config)\n", + "calculator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'message': {'time_in_ms': 0.333}}\n", + "after: {'message': {'time_in_ns': 333000}}\n", + "True\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "calculator.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "calculator.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/concatenator.ipynb b/doc/source/development/notebooks/processor_examples/concatenator.ipynb index 31df48129..5e70fe707 100644 --- a/doc/source/development/notebooks/processor_examples/concatenator.ipynb +++ b/doc/source/development/notebooks/processor_examples/concatenator.ipynb @@ -1,238 +1,238 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Concatenator\n", - "\n", - "This presentations goal it to introduce the features of the `Concatenator` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want to merge different fields from an event in one target field." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " 'data_stream': {\n", - " 'dataset': 'windows', \n", - " 'namespace': 'devopslab', \n", - " 'type': 'logs'\n", - " }, \n", - " '_op_type': 'create'\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " 'data_stream': {\n", - " 'dataset': 'windows', \n", - " 'namespace': 'devopslab', \n", - " 'type': 'logs'\n", - " }, \n", - " '_op_type': 'create', \n", - " '_index': 'logs-windows-devopslab'\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "230" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Concatenator\n", + "\n", + "This presentations goal it to introduce the features of the `Concatenator` and how to configure it." ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"data_stream\"\n", - "concatenator:\n", - " source_fields:\n", - " - data_stream.type\n", - " - data_stream.dataset\n", - " - data_stream.namespace\n", - " target_field: _index\n", - " separator: \"-\"\n", - " overwrite_target: false\n", - " delete_source_fields: false\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"data-stream.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"myconcatenator\":{ \n", - " \"type\": \"concatenator\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "concatenator" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want to merge different fields from an event in one target field." ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "concatenator = Factory.create(processor_config, mock_logger)\n", - "concatenator" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " 'data_stream': {\n", + " 'dataset': 'windows', \n", + " 'namespace': 'devopslab', \n", + " 'type': 'logs'\n", + " }, \n", + " '_op_type': 'create'\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create'}\n", - "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create', '_index': 'logs-windows-devopslab'}\n", - "True\n" - ] + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " 'data_stream': {\n", + " 'dataset': 'windows', \n", + " 'namespace': 'devopslab', \n", + " 'type': 'logs'\n", + " }, \n", + " '_op_type': 'create', \n", + " '_index': 'logs-windows-devopslab'\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "230" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"data_stream\"\n", + "concatenator:\n", + " source_fields:\n", + " - data_stream.type\n", + " - data_stream.dataset\n", + " - data_stream.namespace\n", + " target_field: _index\n", + " separator: \"-\"\n", + " overwrite_target: false\n", + " delete_source_fields: false\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"data-stream.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"myconcatenator\":{ \n", + " \"type\": \"concatenator\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "concatenator" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "concatenator = Factory.create(processor_config)\n", + "concatenator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create'}\n", + "after: {'data_stream': {'dataset': 'windows', 'namespace': 'devopslab', 'type': 'logs'}, '_op_type': 'create', '_index': 'logs-windows-devopslab'}\n", + "True\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "concatenator.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "concatenator.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/dissector.ipynb b/doc/source/development/notebooks/processor_examples/dissector.ipynb index 4c4249935..58fc70bda 100644 --- a/doc/source/development/notebooks/processor_examples/dissector.ipynb +++ b/doc/source/development/notebooks/processor_examples/dissector.ipynb @@ -1,237 +1,237 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Dissector\n", - "\n", - "This presentations goal it to introduce the features of the `Dissector` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Dissector\n", - "\n", - "This presentations goal it to introduce the features of the `Dissector` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want to dissect a field to different target fields." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "document = { \"message\": \"Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found\" }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"message\": \"Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found\",\n", - " \"@timestamp\": \"Oct 17 11:54:21\",\n", - " \"hostname\": \"dev-machine\",\n", - " \"process\": {\n", - " \"name\": \"hv_kvp_daemon\",\n", - " \"pid\": 3416730\n", - " },\n", - " \"sh\": \"/usr/libexec/hypervkvpd/hv_get_dns_info: not found\"\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "215" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"message\"\n", - "dissector:\n", - " mapping:\n", - " message: \"%{@timestamp} %{+@timestamp} %{+@timestamp} %{hostname} %{process.name}[%{process.pid}]: %{?shell}: %{}: %{&shell}\"\n", - " convert_datatype:\n", - " process.pid: int\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"data-stream.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"thealmightydissector\":{ \n", - " \"type\": \"dissector\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dissector" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dissector\n", + "\n", + "This presentations goal it to introduce the features of the `Dissector` and how to configure it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dissector\n", + "\n", + "This presentations goal it to introduce the features of the `Dissector` and how to configure it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want to dissect a field to different target fields." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "document = { \"message\": \"Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found\" }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"message\": \"Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found\",\n", + " \"@timestamp\": \"Oct 17 11:54:21\",\n", + " \"hostname\": \"dev-machine\",\n", + " \"process\": {\n", + " \"name\": \"hv_kvp_daemon\",\n", + " \"pid\": 3416730\n", + " },\n", + " \"sh\": \"/usr/libexec/hypervkvpd/hv_get_dns_info: not found\"\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "215" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"message\"\n", + "dissector:\n", + " mapping:\n", + " message: \"%{@timestamp} %{+@timestamp} %{+@timestamp} %{hostname} %{process.name}[%{process.pid}]: %{?shell}: %{}: %{&shell}\"\n", + " convert_datatype:\n", + " process.pid: int\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"data-stream.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"thealmightydissector\":{ \n", + " \"type\": \"dissector\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dissector" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "dissector = Factory.create(processor_config)\n", + "dissector" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'message': 'Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found'}\n", + "after: {'message': 'Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found', '@timestamp': 'Oct 17 11:54:21', 'hostname': 'dev-machine', 'process': {'name': 'hv_kvp_daemon', 'pid': 3416730}, 'sh': '/usr/libexec/hypervkvpd/hv_get_dns_info: not found'}\n", + "True\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "dissector.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] } - ], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "dissector = Factory.create(processor_config, mock_logger)\n", - "dissector" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'message': 'Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found'}\n", - "after: {'message': 'Oct 17 11:54:21 dev-machine hv_kvp_daemon[3416730]: sh: 1: /usr/libexec/hypervkvpd/hv_get_dns_info: not found', '@timestamp': 'Oct 17 11:54:21', 'hostname': 'dev-machine', 'process': {'name': 'hv_kvp_daemon', 'pid': 3416730}, 'sh': '/usr/libexec/hypervkvpd/hv_get_dns_info: not found'}\n", - "True\n" - ] + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "dissector.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/field_manager.ipynb b/doc/source/development/notebooks/processor_examples/field_manager.ipynb index eafe01611..2d28de547 100644 --- a/doc/source/development/notebooks/processor_examples/field_manager.ipynb +++ b/doc/source/development/notebooks/processor_examples/field_manager.ipynb @@ -1,291 +1,291 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# FieldManager\n", - "\n", - "This presentations goal it to introduce the features of the `FieldManager` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenges\n", - "\n", - "- I want to move or rename a field.\n", - "- I want to copy a field.\n", - "- I want to merge field values to a list.\n", - "- I want to merge lists from different fields to one list in a new or existing field\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "given preprocessed log entry:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " \"client\": {\"ip\": [\"127.0.0.1\", \"fe89::\", \"192.168.5.1\"], \"nat\": {\"ip\": \"223.2.3.2\"}},\n", - " \"destination\": {\"ip\": \"8.8.8.8\"},\n", - " \"host\": {\"_hostname\": \"customer2\", \"ip\": [\"192.168.5.1\", \"180.22.66.3\"]},\n", - " \"observer\": {\"ip\": \"10.10.2.33\"},\n", - " \"server\": {\"ip\": \"10.10.2.33\", \"nat\": {\"ip\": \"180.22.66.1\"}},\n", - " \"source\": {\"ip\": \"10.10.2.33\"},\n", - " \"preexisting\": \"I exists already\"\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rules and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rules:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "[filter=\"host._hostname\", FieldManagerRule.Config(description='', regex_fields=[], tests=[], tag_on_failure=['_field_manager_failure'], source_fields=['client.nat.ip', 'source.ip'], target_field='related.ip', delete_source_fields=True, overwrite_target=True, extend_target_list=True)]" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# FieldManager\n", + "\n", + "This presentations goal it to introduce the features of the `FieldManager` and how to configure it." ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "\n", - "from logprep.processor.field_manager.rule import FieldManagerRule\n", - "rules_definitions = [\n", - " {\n", - " \"filter\": \"host._hostname\",\n", - " \"field_manager\": {\n", - " \"source_fields\": [\"client.nat.ip\", \"source.ip\"],\n", - " \"target_field\": \"related.ip\",\n", - " \"overwrite_target\": True,\n", - " \"delete_source_fields\": True,\n", - " \"extend_target_list\": True\n", - " },\n", - " }\n", - "]\n", - "rules = [FieldManagerRule._create_from_dict(rule_dict) for rule_dict in rules_definitions]\n", - "rules" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"the_field_manager\": {\n", - " \"type\": \"field_manager\",\n", - " \"specific_rules\": [\"/dev\"],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "field_manager" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenges\n", + "\n", + "- I want to move or rename a field.\n", + "- I want to copy a field.\n", + "- I want to merge field values to a list.\n", + "- I want to merge lists from different fields to one list in a new or existing field\n" ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from logging import getLogger\n", - "from logprep.factory import Factory\n", - "\n", - "logger = getLogger()\n", - "\n", - "processor = Factory.create(processor_config, logger)\n", - "processor\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "load rules to processor" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "[filter=\"host._hostname\", FieldManagerRule.Config(description='', regex_fields=[], tests=[], tag_on_failure=['_field_manager_failure'], source_fields=['client.nat.ip', 'source.ip'], target_field='related.ip', delete_source_fields=True, overwrite_target=True, extend_target_list=True)]" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "given preprocessed log entry:" ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "for rule in rules:\n", - " processor._specific_tree.add_rule(rule)\n", - " \n", - "processor._specific_rules" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "mydocument = deepcopy(document)\n", - "processor.process(mydocument)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Check Results" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "{'client': {'ip': ['127.0.0.1', 'fe89::', '192.168.5.1'],\n", - " 'nat': {'ip': '223.2.3.2'}},\n", - " 'destination': {'ip': '8.8.8.8'},\n", - " 'host': {'_hostname': 'customer2', 'ip': ['192.168.5.1', '180.22.66.3']},\n", - " 'observer': {'ip': '10.10.2.33'},\n", - " 'server': {'ip': '10.10.2.33', 'nat': {'ip': '180.22.66.1'}},\n", - " 'source': {'ip': '10.10.2.33'},\n", - " 'preexisting': 'I exists already'}" + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " \"client\": {\"ip\": [\"127.0.0.1\", \"fe89::\", \"192.168.5.1\"], \"nat\": {\"ip\": \"223.2.3.2\"}},\n", + " \"destination\": {\"ip\": \"8.8.8.8\"},\n", + " \"host\": {\"_hostname\": \"customer2\", \"ip\": [\"192.168.5.1\", \"180.22.66.3\"]},\n", + " \"observer\": {\"ip\": \"10.10.2.33\"},\n", + " \"server\": {\"ip\": \"10.10.2.33\", \"nat\": {\"ip\": \"180.22.66.1\"}},\n", + " \"source\": {\"ip\": \"10.10.2.33\"},\n", + " \"preexisting\": \"I exists already\"\n", + "}\n" ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "document" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rules and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rules:" + ] + }, { - "data": { - "text/plain": [ - "{'client': {'ip': ['127.0.0.1', 'fe89::', '192.168.5.1']},\n", - " 'destination': {'ip': '8.8.8.8'},\n", - " 'host': {'_hostname': 'customer2', 'ip': ['192.168.5.1', '180.22.66.3']},\n", - " 'observer': {'ip': '10.10.2.33'},\n", - " 'server': {'ip': '10.10.2.33', 'nat': {'ip': '180.22.66.1'}},\n", - " 'preexisting': 'I exists already',\n", - " 'related': {'ip': ['10.10.2.33', '223.2.3.2']}}" + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[filter=\"host._hostname\", FieldManagerRule.Config(description='', regex_fields=[], tests=[], tag_on_failure=['_field_manager_failure'], source_fields=['client.nat.ip', 'source.ip'], target_field='related.ip', delete_source_fields=True, overwrite_target=True, extend_target_list=True)]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "\n", + "from logprep.processor.field_manager.rule import FieldManagerRule\n", + "rules_definitions = [\n", + " {\n", + " \"filter\": \"host._hostname\",\n", + " \"field_manager\": {\n", + " \"source_fields\": [\"client.nat.ip\", \"source.ip\"],\n", + " \"target_field\": \"related.ip\",\n", + " \"overwrite_target\": True,\n", + " \"delete_source_fields\": True,\n", + " \"extend_target_list\": True\n", + " },\n", + " }\n", + "]\n", + "rules = [FieldManagerRule._create_from_dict(rule_dict) for rule_dict in rules_definitions]\n", + "rules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"the_field_manager\": {\n", + " \"type\": \"field_manager\",\n", + " \"specific_rules\": [\"/dev\"],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "field_manager" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from logging import getLogger\n", + "from logprep.factory import Factory\n", + "\n", + "logger = getLogger()\n", + "\n", + "processor = Factory.create(processor_config)\n", + "processor\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "load rules to processor" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[filter=\"host._hostname\", FieldManagerRule.Config(description='', regex_fields=[], tests=[], tag_on_failure=['_field_manager_failure'], source_fields=['client.nat.ip', 'source.ip'], target_field='related.ip', delete_source_fields=True, overwrite_target=True, extend_target_list=True)]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for rule in rules:\n", + " processor._specific_tree.add_rule(rule)\n", + " \n", + "processor._specific_rules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "\n", + "mydocument = deepcopy(document)\n", + "processor.process(mydocument)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check Results" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'client': {'ip': ['127.0.0.1', 'fe89::', '192.168.5.1'],\n", + " 'nat': {'ip': '223.2.3.2'}},\n", + " 'destination': {'ip': '8.8.8.8'},\n", + " 'host': {'_hostname': 'customer2', 'ip': ['192.168.5.1', '180.22.66.3']},\n", + " 'observer': {'ip': '10.10.2.33'},\n", + " 'server': {'ip': '10.10.2.33', 'nat': {'ip': '180.22.66.1'}},\n", + " 'source': {'ip': '10.10.2.33'},\n", + " 'preexisting': 'I exists already'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "document" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'client': {'ip': ['127.0.0.1', 'fe89::', '192.168.5.1']},\n", + " 'destination': {'ip': '8.8.8.8'},\n", + " 'host': {'_hostname': 'customer2', 'ip': ['192.168.5.1', '180.22.66.3']},\n", + " 'observer': {'ip': '10.10.2.33'},\n", + " 'server': {'ip': '10.10.2.33', 'nat': {'ip': '180.22.66.1'}},\n", + " 'preexisting': 'I exists already',\n", + " 'related': {'ip': ['10.10.2.33', '223.2.3.2']}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mydocument" ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" } - ], - "source": [ - "mydocument" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } + } }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/generic_adder.ipynb b/doc/source/development/notebooks/processor_examples/generic_adder.ipynb index 16a826a13..d85d93170 100644 --- a/doc/source/development/notebooks/processor_examples/generic_adder.ipynb +++ b/doc/source/development/notebooks/processor_examples/generic_adder.ipynb @@ -1,199 +1,199 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Generic Adder\n", - "\n", - "This presentations goal it to introduce the features of the `Generic Adder` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want add fields or values depending on a matching filter." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " 'message': {\n", - " \"time_in_ms\": \"bla\",\n", - " \"tags\": [\"hello\"]\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " 'message': {\n", - " \"time_in_ms\": \"bla\",\n", - " \"tags\": [\"hello\", \"new\"]\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"almighty generic adder\":{ \n", - " \"type\": \"generic_adder\",\n", - " \"specific_rules\": [{\"filter\": \"*\", \"generic_adder\": {\"extend_target_list\": True, \"add\": {\"message.tags\": \"New\"}} }],\n", - " \"generic_rules\": [],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "generic_adder" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generic Adder\n", + "\n", + "This presentations goal it to introduce the features of the `Generic Adder` and how to configure it." ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "calculator = Factory.create(processor_config, mock_logger)\n", - "calculator" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want add fields or values depending on a matching filter." + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'message': {'time_in_ms': 'bla', 'tags': ['hello']}}\n", - "after: {'message': {'time_in_ms': 'bla', 'tags': ['hello', 'New']}}\n", - "False\n" - ] + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " 'message': {\n", + " \"time_in_ms\": \"bla\",\n", + " \"tags\": [\"hello\"]\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " 'message': {\n", + " \"time_in_ms\": \"bla\",\n", + " \"tags\": [\"hello\", \"new\"]\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"almighty generic adder\":{ \n", + " \"type\": \"generic_adder\",\n", + " \"specific_rules\": [{\"filter\": \"*\", \"generic_adder\": {\"extend_target_list\": True, \"add\": {\"message.tags\": \"New\"}} }],\n", + " \"generic_rules\": [],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "generic_adder" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "calculator = Factory.create(processor_config)\n", + "calculator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'message': {'time_in_ms': 'bla', 'tags': ['hello']}}\n", + "after: {'message': {'time_in_ms': 'bla', 'tags': ['hello', 'New']}}\n", + "False\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "calculator.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "calculator.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/geo_ip_enricher_custom_outputfields.ipynb b/doc/source/development/notebooks/processor_examples/geo_ip_enricher_custom_outputfields.ipynb index 15ba0f8ff..f5cebaa2d 100644 --- a/doc/source/development/notebooks/processor_examples/geo_ip_enricher_custom_outputfields.ipynb +++ b/doc/source/development/notebooks/processor_examples/geo_ip_enricher_custom_outputfields.ipynb @@ -1,270 +1,270 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Configuration of the GeoipEnricher\n", - "\n", - "This presentations goal is to introduce the configuration of the output subfields of the `GeoipEnricher`.\n", - "\n", - "Prerequisites: a local geo ip database is available" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "The given document" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\"client\": {\"ip\": \"8.8.8.8\"}}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "will result in the default output" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "default_output = {\n", - " \"client\": {\"ip: 8.8.8.8\"},\n", - " \"geoip\": {\n", - " \"geometry\": {\"coordinates\": [-97.822, 37.751], \"type\": \"Point\"},\n", - " \"properties\": {\n", - " \"accuracy_radius\": 1000,\n", - " \"continent\": \"North America\",\n", - " \"continent_code\": \"NA\",\n", - " \"country\": \"United States\",\n", - " \"country_iso_code\": \"US\",\n", - " \"time_zone\": \"America/Chicago\",\n", - " },\n", - " \"type\": \"Feature\",\n", - " },\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "which instead should be configured to look like" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "expected_output = {\n", - " \"client\": {\n", - " \"geo\": {\n", - " \"accuracy\": 1000,\n", - " \"continent_code\": \"NA\",\n", - " \"continent_name\": \"North America\",\n", - " \"country_iso_code\": \"US\",\n", - " \"country_name\": \"United States\",\n", - " \"geometry_type\": \"Point\",\n", - " \"location\": [-97.822, 37.751],\n", - " \"timezone\": \"America/Chicago\",\n", - " \"type\": \"Feature\",\n", - " },\n", - " \"ip\": \"8.8.8.8\",\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor\n", - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "678" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Configuration of the GeoipEnricher\n", + "\n", + "This presentations goal is to introduce the configuration of the output subfields of the `GeoipEnricher`.\n", + "\n", + "Prerequisites: a local geo ip database is available" ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "from pathlib import Path\n", - "import tempfile\n", - "\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"client.ip\"\n", - "geoip_enricher:\n", - " source_fields: [\"client.ip\"]\n", - " customize_target_subfields: \n", - " type: client.geo.type\n", - " geometry.type: client.geo.geometry_type\n", - " geometry.coordinates: client.geo.location\n", - " properties.accuracy_radius: client.geo.accuracy\n", - " properties.continent: client.geo.continent_name\n", - " properties.continent_code: client.geo.continent_code\n", - " properties.country: client.geo.country_name\n", - " properties.city: client.geo.city_name\n", - " properties.postal_code: client.geo.postal_code\n", - " properties.subdivision: client.geo.subdivision\n", - " properties.time_zone: client.geo.timezone\n", - " properties.country_iso_code: client.geo.country_iso_code\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"geoip\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"data-stream.yml\"\n", - "rule_file.write_text(rule_yaml)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config and replace the `db_path` with your local geo ip database:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"geoip_enricher\": {\n", - " \"type\": \"geoip_enricher\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " \"db_path\": \"\"\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ + }, { - "ename": "InvalidConfigurationError", - "evalue": "db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInvalidConfigurationError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[24], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mfactory\u001b[39;00m \u001b[39mimport\u001b[39;00m Factory\n\u001b[1;32m 4\u001b[0m mock_logger \u001b[39m=\u001b[39m mock\u001b[39m.\u001b[39mMagicMock()\n\u001b[0;32m----> 5\u001b[0m geoip_enricher \u001b[39m=\u001b[39m Factory\u001b[39m.\u001b[39;49mcreate(processor_config, mock_logger)\n\u001b[1;32m 6\u001b[0m geoip_enricher\n", - "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/factory.py:36\u001b[0m, in \u001b[0;36mFactory.create\u001b[0;34m(cls, configuration, logger)\u001b[0m\n\u001b[1;32m 34\u001b[0m metric_labels \u001b[39m=\u001b[39m configuration[connector_name]\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mmetric_labels\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 35\u001b[0m connector \u001b[39m=\u001b[39m Configuration\u001b[39m.\u001b[39mget_class(connector_name, connector_configuration_dict)\n\u001b[0;32m---> 36\u001b[0m connector_configuration \u001b[39m=\u001b[39m Configuration\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 37\u001b[0m connector_name, connector_configuration_dict\n\u001b[1;32m 38\u001b[0m )\n\u001b[1;32m 39\u001b[0m connector_configuration\u001b[39m.\u001b[39mmetric_labels \u001b[39m=\u001b[39m copy\u001b[39m.\u001b[39mdeepcopy(metric_labels)\n\u001b[1;32m 40\u001b[0m \u001b[39mreturn\u001b[39;00m connector(connector_name, connector_configuration, logger)\n", - "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/configuration.py:34\u001b[0m, in \u001b[0;36mConfiguration.create\u001b[0;34m(cls, name, config_)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[39m\"\"\"factory method to create component configuration\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \n\u001b[1;32m 21\u001b[0m \u001b[39mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39m the pipeline component configuration\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 33\u001b[0m class_ \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mget_class(name, config_)\n\u001b[0;32m---> 34\u001b[0m \u001b[39mreturn\u001b[39;00m class_\u001b[39m.\u001b[39;49mConfig(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mconfig_)\n", - "File \u001b[0;32m:13\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, type, specific_rules, generic_rules, tree_config, db_path)\u001b[0m\n\u001b[1;32m 11\u001b[0m __attr_validator_generic_rules(\u001b[39mself\u001b[39m, __attr_generic_rules, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgeneric_rules)\n\u001b[1;32m 12\u001b[0m __attr_validator_tree_config(\u001b[39mself\u001b[39m, __attr_tree_config, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtree_config)\n\u001b[0;32m---> 13\u001b[0m __attr_validator_db_path(\u001b[39mself\u001b[39;49m, __attr_db_path, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdb_path)\n", - "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:53\u001b[0m, in \u001b[0;36murl_validator\u001b[0;34m(_, attribute, value)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m has no schema, net location and path\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 52\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m parsed_url\u001b[39m.\u001b[39mscheme \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m parsed_url\u001b[39m.\u001b[39mnetloc \u001b[39mand\u001b[39;00m parsed_url\u001b[39m.\u001b[39mpath:\n\u001b[0;32m---> 53\u001b[0m file_validator(_, attribute, value)\n\u001b[1;32m 54\u001b[0m \u001b[39mif\u001b[39;00m parsed_url\u001b[39m.\u001b[39mscheme \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfile\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m parsed_url\u001b[39m.\u001b[39mparams \u001b[39mor\u001b[39;00m parsed_url\u001b[39m.\u001b[39mquery \u001b[39mor\u001b[39;00m parsed_url\u001b[39m.\u001b[39mfragment:\n", - "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:23\u001b[0m, in \u001b[0;36mfile_validator\u001b[0;34m(_, attribute, value)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m is not a str\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mexists(value):\n\u001b[0;32m---> 23\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m file \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m does not exist\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 24\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39misfile(value):\n\u001b[1;32m 25\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is not a file\u001b[39m\u001b[39m\"\u001b[39m)\n", - "\u001b[0;31mInvalidConfigurationError\u001b[0m: db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist" - ] - } - ], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "geoip_enricher = Factory.create(processor_config, mock_logger)\n", - "geoip_enricher\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "The given document" + ] + }, { - "data": { - "text/plain": [ - "'The output has the expected form: True'" + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\"client\": {\"ip\": \"8.8.8.8\"}}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "will result in the default output" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "default_output = {\n", + " \"client\": {\"ip: 8.8.8.8\"},\n", + " \"geoip\": {\n", + " \"geometry\": {\"coordinates\": [-97.822, 37.751], \"type\": \"Point\"},\n", + " \"properties\": {\n", + " \"accuracy_radius\": 1000,\n", + " \"continent\": \"North America\",\n", + " \"continent_code\": \"NA\",\n", + " \"country\": \"United States\",\n", + " \"country_iso_code\": \"US\",\n", + " \"time_zone\": \"America/Chicago\",\n", + " },\n", + " \"type\": \"Feature\",\n", + " },\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which instead should be configured to look like" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "expected_output = {\n", + " \"client\": {\n", + " \"geo\": {\n", + " \"accuracy\": 1000,\n", + " \"continent_code\": \"NA\",\n", + " \"continent_name\": \"North America\",\n", + " \"country_iso_code\": \"US\",\n", + " \"country_name\": \"United States\",\n", + " \"geometry_type\": \"Point\",\n", + " \"location\": [-97.822, 37.751],\n", + " \"timezone\": \"America/Chicago\",\n", + " \"type\": \"Feature\",\n", + " },\n", + " \"ip\": \"8.8.8.8\",\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor\n", + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "678" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "from pathlib import Path\n", + "import tempfile\n", + "\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"client.ip\"\n", + "geoip_enricher:\n", + " source_fields: [\"client.ip\"]\n", + " customize_target_subfields: \n", + " type: client.geo.type\n", + " geometry.type: client.geo.geometry_type\n", + " geometry.coordinates: client.geo.location\n", + " properties.accuracy_radius: client.geo.accuracy\n", + " properties.continent: client.geo.continent_name\n", + " properties.continent_code: client.geo.continent_code\n", + " properties.country: client.geo.country_name\n", + " properties.city: client.geo.city_name\n", + " properties.postal_code: client.geo.postal_code\n", + " properties.subdivision: client.geo.subdivision\n", + " properties.time_zone: client.geo.timezone\n", + " properties.country_iso_code: client.geo.country_iso_code\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"geoip\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"data-stream.yml\"\n", + "rule_file.write_text(rule_yaml)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config and replace the `db_path` with your local geo ip database:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"geoip_enricher\": {\n", + " \"type\": \"geoip_enricher\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " \"db_path\": \"\"\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "ename": "InvalidConfigurationError", + "evalue": "db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mInvalidConfigurationError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[24], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mfactory\u001b[39;00m \u001b[39mimport\u001b[39;00m Factory\n\u001b[1;32m 4\u001b[0m mock_logger \u001b[39m=\u001b[39m mock\u001b[39m.\u001b[39mMagicMock()\n\u001b[0;32m----> 5\u001b[0m geoip_enricher \u001b[39m=\u001b[39m Factory\u001b[39m.\u001b[39;49mcreate(processor_config, mock_logger)\n\u001b[1;32m 6\u001b[0m geoip_enricher\n", + "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/factory.py:36\u001b[0m, in \u001b[0;36mFactory.create\u001b[0;34m(cls, configuration, logger)\u001b[0m\n\u001b[1;32m 34\u001b[0m metric_labels \u001b[39m=\u001b[39m configuration[connector_name]\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mmetric_labels\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 35\u001b[0m connector \u001b[39m=\u001b[39m Configuration\u001b[39m.\u001b[39mget_class(connector_name, connector_configuration_dict)\n\u001b[0;32m---> 36\u001b[0m connector_configuration \u001b[39m=\u001b[39m Configuration\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 37\u001b[0m connector_name, connector_configuration_dict\n\u001b[1;32m 38\u001b[0m )\n\u001b[1;32m 39\u001b[0m connector_configuration\u001b[39m.\u001b[39mmetric_labels \u001b[39m=\u001b[39m copy\u001b[39m.\u001b[39mdeepcopy(metric_labels)\n\u001b[1;32m 40\u001b[0m \u001b[39mreturn\u001b[39;00m connector(connector_name, connector_configuration, logger)\n", + "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/configuration.py:34\u001b[0m, in \u001b[0;36mConfiguration.create\u001b[0;34m(cls, name, config_)\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[39m\"\"\"factory method to create component configuration\u001b[39;00m\n\u001b[1;32m 20\u001b[0m \n\u001b[1;32m 21\u001b[0m \u001b[39mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39m the pipeline component configuration\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 33\u001b[0m class_ \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mget_class(name, config_)\n\u001b[0;32m---> 34\u001b[0m \u001b[39mreturn\u001b[39;00m class_\u001b[39m.\u001b[39;49mConfig(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mconfig_)\n", + "File \u001b[0;32m:13\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, type, specific_rules, generic_rules, tree_config, db_path)\u001b[0m\n\u001b[1;32m 11\u001b[0m __attr_validator_generic_rules(\u001b[39mself\u001b[39m, __attr_generic_rules, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgeneric_rules)\n\u001b[1;32m 12\u001b[0m __attr_validator_tree_config(\u001b[39mself\u001b[39m, __attr_tree_config, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtree_config)\n\u001b[0;32m---> 13\u001b[0m __attr_validator_db_path(\u001b[39mself\u001b[39;49m, __attr_db_path, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdb_path)\n", + "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:53\u001b[0m, in \u001b[0;36murl_validator\u001b[0;34m(_, attribute, value)\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m has no schema, net location and path\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 52\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m parsed_url\u001b[39m.\u001b[39mscheme \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m parsed_url\u001b[39m.\u001b[39mnetloc \u001b[39mand\u001b[39;00m parsed_url\u001b[39m.\u001b[39mpath:\n\u001b[0;32m---> 53\u001b[0m file_validator(_, attribute, value)\n\u001b[1;32m 54\u001b[0m \u001b[39mif\u001b[39;00m parsed_url\u001b[39m.\u001b[39mscheme \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfile\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m parsed_url\u001b[39m.\u001b[39mparams \u001b[39mor\u001b[39;00m parsed_url\u001b[39m.\u001b[39mquery \u001b[39mor\u001b[39;00m parsed_url\u001b[39m.\u001b[39mfragment:\n", + "File \u001b[0;32m~/external_work/Logprep/doc/source/development/notebooks/processor_examples/../../../../../logprep/util/validators.py:23\u001b[0m, in \u001b[0;36mfile_validator\u001b[0;34m(_, attribute, value)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m is not a str\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mexists(value):\n\u001b[0;32m---> 23\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m file \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m does not exist\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 24\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m os\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39misfile(value):\n\u001b[1;32m 25\u001b[0m \u001b[39mraise\u001b[39;00m InvalidConfigurationError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mattribute\u001b[39m.\u001b[39mname\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mvalue\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m is not a file\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[0;31mInvalidConfigurationError\u001b[0m: db_path file 'tests/testdata/mock_external/MockGeoLite2-City.mmdb' does not exist" + ] + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "geoip_enricher = Factory.create(processor_config)\n", + "geoip_enricher\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'The output has the expected form: True'" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from copy import deepcopy\n", + "\n", + "mydocument = deepcopy(document)\n", + "geoip_enricher.process(mydocument)\n", + "assert mydocument == expected_output\n", + "f\"The output has the expected form: {mydocument == expected_output}\"" ] - }, - "execution_count": 98, - "metadata": {}, - "output_type": "execute_result" } - ], - "source": [ - "from copy import deepcopy\n", - "\n", - "mydocument = deepcopy(document)\n", - "geoip_enricher.process(mydocument)\n", - "assert mydocument == expected_output\n", - "f\"The output has the expected form: {mydocument == expected_output}\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } + } }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/grokker.ipynb b/doc/source/development/notebooks/processor_examples/grokker.ipynb index 06bd69e8b..6f75a9fa1 100644 --- a/doc/source/development/notebooks/processor_examples/grokker.ipynb +++ b/doc/source/development/notebooks/processor_examples/grokker.ipynb @@ -1,218 +1,218 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Grokker\n", - "\n", - "This presentations goal it to introduce the features of the `Grokker` and how to configure it." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want to dissect a field to different target fields by logstash based grok patterns." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\"message\": \"2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log\"}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"message\": \"2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log\",\n", - " \"@timestamp\": \"2020-07-16T19:20:30.45+01:00\",\n", - " \"logLevel\": \"DEBUG\",\n", - " \"logMessage\": \"This is a sample log\",\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "135" + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Grokker\n", + "\n", + "This presentations goal it to introduce the features of the `Grokker` and how to configure it." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want to dissect a field to different target fields by logstash based grok patterns." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\"message\": \"2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"message\": \"2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log\",\n", + " \"@timestamp\": \"2020-07-16T19:20:30.45+01:00\",\n", + " \"logLevel\": \"DEBUG\",\n", + " \"logMessage\": \"This is a sample log\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "135" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"message\"\n", + "grokker:\n", + " mapping:\n", + " message: \"%{TIMESTAMP_ISO8601:@timestamp} %{LOGLEVEL:logLevel} %{GREEDYDATA:logMessage}\"\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"grokker\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"grokker.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"mygrokker\":{ \n", + " \"type\": \"grokker\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "grokker = Factory.create(processor_config)\n", + "grokker.setup()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'message': '2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log'}\n", + "after: {'message': '2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log', '@timestamp': '2020-07-16T19:20:30.45+01:00', 'logLevel': 'DEBUG', 'logMessage': 'This is a sample log'}\n", + "True\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "grokker.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"message\"\n", - "grokker:\n", - " mapping:\n", - " message: \"%{TIMESTAMP_ISO8601:@timestamp} %{LOGLEVEL:logLevel} %{GREEDYDATA:logMessage}\"\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"grokker\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"grokker.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"mygrokker\":{ \n", - " \"type\": \"grokker\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "grokker = Factory.create(processor_config, mock_logger)\n", - "grokker.setup()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'message': '2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log'}\n", - "after: {'message': '2020-07-16T19:20:30.45+01:00 DEBUG This is a sample log', '@timestamp': '2020-07-16T19:20:30.45+01:00', 'logLevel': 'DEBUG', 'logMessage': 'This is a sample log'}\n", - "True\n" - ] + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "grokker.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/ip_informer.ipynb b/doc/source/development/notebooks/processor_examples/ip_informer.ipynb index 6d80f922a..d1c827944 100644 --- a/doc/source/development/notebooks/processor_examples/ip_informer.ipynb +++ b/doc/source/development/notebooks/processor_examples/ip_informer.ipynb @@ -1,406 +1,406 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# IpInformer\n", - "\n", - "This presentations goal it to introduce the features of the `IpInformer` and how to configure it." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want to enrich an event with additional information of ip_addresses" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " 'ip_addresses': [\n", - " \"127.0.0.1\",\n", - " \"::1\",\n", - " \"192.168.178.54\",\n", - " \"10.10.0.2\",\n", - " \"fe80::b056:32ff:fe70:1f61\"\n", - " ]\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"ip_addresses\": {\n", - " \"127.0.0.1\": {\n", - " \"compressed\": \"127.0.0.1\",\n", - " \"exploded\": \"127.0.0.1\",\n", - " \"is_global\": False,\n", - " \"is_link_local\": False,\n", - " \"is_loopback\": True,\n", - " \"is_multicast\": False,\n", - " \"is_private\": True,\n", - " \"is_reserved\": False,\n", - " \"is_unspecified\": False,\n", - " \"max_prefixlen\": 32,\n", - " \"reverse_pointer\": \"1.0.0.127.in-addr.arpa\",\n", - " \"version\": 4\n", - " },\n", - " \"::1\": {\n", - " \"compressed\": \"::1\",\n", - " \"exploded\": \"0000:0000:0000:0000:0000:0000:0000:0001\",\n", - " \"ipv4_mapped\": None,\n", - " \"is_global\": False,\n", - " \"is_link_local\": False,\n", - " \"is_loopback\": True,\n", - " \"is_multicast\": False,\n", - " \"is_private\": True,\n", - " \"is_reserved\": True,\n", - " \"is_site_local\": False,\n", - " \"is_unspecified\": False,\n", - " \"max_prefixlen\": 128,\n", - " \"reverse_pointer\": \"1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa\",\n", - " \"scope_id\": None,\n", - " \"sixtofour\": None,\n", - " \"teredo\": None,\n", - " \"version\": 6\n", - " },\n", - " \"192.168.178.54\": {\n", - " \"compressed\": \"192.168.178.54\",\n", - " \"exploded\": \"192.168.178.54\",\n", - " \"is_global\": False,\n", - " \"is_link_local\": False,\n", - " \"is_loopback\": False,\n", - " \"is_multicast\": False,\n", - " \"is_private\": True,\n", - " \"is_reserved\": False,\n", - " \"is_unspecified\": False,\n", - " \"max_prefixlen\": 32,\n", - " \"reverse_pointer\": \"54.178.168.192.in-addr.arpa\",\n", - " \"version\": 4\n", - " },\n", - " \"10.10.0.2\": {\n", - " \"compressed\": \"10.10.0.2\",\n", - " \"exploded\": \"10.10.0.2\",\n", - " \"is_global\": False,\n", - " \"is_link_local\": False,\n", - " \"is_loopback\": False,\n", - " \"is_multicast\": False,\n", - " \"is_private\": True,\n", - " \"is_reserved\": False,\n", - " \"is_unspecified\": False,\n", - " \"max_prefixlen\": 32,\n", - " \"reverse_pointer\": \"2.0.10.10.in-addr.arpa\",\n", - " \"version\": 4\n", - " },\n", - " \"fe80::b056:32ff:fe70:1f61\": {\n", - " \"compressed\": \"fe80::b056:32ff:fe70:1f61\",\n", - " \"exploded\": \"fe80:0000:0000:0000:b056:32ff:fe70:1f61\",\n", - " \"ipv4_mapped\": None,\n", - " \"is_global\": False,\n", - " \"is_link_local\": True,\n", - " \"is_loopback\": False,\n", - " \"is_multicast\": False,\n", - " \"is_private\": True,\n", - " \"is_reserved\": False,\n", - " \"is_site_local\": False,\n", - " \"is_unspecified\": False,\n", - " \"max_prefixlen\": 128,\n", - " \"reverse_pointer\": \"1.6.f.1.0.7.e.f.f.f.2.3.6.5.0.b.0.0.0.0.0.0.0.0.0.0.0.0.0.8.e.f.ip6.arpa\",\n", - " \"scope_id\": None,\n", - " \"sixtofour\": None,\n", - " \"teredo\": None,\n", - " \"version\": 6\n", - " }\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "from logprep.processor.ip_informer.rule import IpInformerRule\n", - "\n", - "rule_definition = {\n", - " \"filter\": \"ip_addresses\",\n", - " \"ip_informer\": {\n", - " \"source_fields\": [\"ip_addresses\"],\n", - " \"target_field\": \"ip_addresses\",\n", - " \"overwrite_target\": True\n", - " }\n", - "}\n", - "\n", - "rule = IpInformerRule._create_from_dict(rule_definition)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"the_ip_informer_name\":{ \n", - " \"type\": \"ip_informer\",\n", - " \"specific_rules\": [],\n", - " \"generic_rules\": [],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "ip_informer" + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# IpInformer\n", + "\n", + "This presentations goal it to introduce the features of the `IpInformer` and how to configure it." ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from logging import getLogger\n", - "from logprep.factory import Factory\n", - "\n", - "logger = getLogger()\n", - "ip_informer = Factory.create(processor_config, logger)\n", - "ip_informer" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "load the rule to the processor:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "ip_informer._specific_tree.add_rule(rule)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want to enrich an event with additional information of ip_addresses" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " 'ip_addresses': [\n", + " \"127.0.0.1\",\n", + " \"::1\",\n", + " \"192.168.178.54\",\n", + " \"10.10.0.2\",\n", + " \"fe80::b056:32ff:fe70:1f61\"\n", + " ]\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {\n", - " \"ip_addresses\": [\n", - " \"127.0.0.1\",\n", - " \"::1\",\n", - " \"192.168.178.54\",\n", - " \"10.10.0.2\",\n", - " \"fe80::b056:32ff:fe70:1f61\"\n", - " ]\n", - "}\n", - "after: {\n", - " \"ip_addresses\": {\n", - " \"127.0.0.1\": {\n", - " \"compressed\": \"127.0.0.1\",\n", - " \"exploded\": \"127.0.0.1\",\n", - " \"is_global\": false,\n", - " \"is_link_local\": false,\n", - " \"is_loopback\": true,\n", - " \"is_multicast\": false,\n", - " \"is_private\": true,\n", - " \"is_reserved\": false,\n", - " \"is_unspecified\": false,\n", - " \"max_prefixlen\": 32,\n", - " \"reverse_pointer\": \"1.0.0.127.in-addr.arpa\",\n", - " \"version\": 4\n", - " },\n", - " \"::1\": {\n", - " \"compressed\": \"::1\",\n", - " \"exploded\": \"0000:0000:0000:0000:0000:0000:0000:0001\",\n", - " \"ipv4_mapped\": null,\n", - " \"is_global\": false,\n", - " \"is_link_local\": false,\n", - " \"is_loopback\": true,\n", - " \"is_multicast\": false,\n", - " \"is_private\": true,\n", - " \"is_reserved\": true,\n", - " \"is_site_local\": false,\n", - " \"is_unspecified\": false,\n", - " \"max_prefixlen\": 128,\n", - " \"reverse_pointer\": \"1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa\",\n", - " \"scope_id\": null,\n", - " \"sixtofour\": null,\n", - " \"teredo\": null,\n", - " \"version\": 6\n", - " },\n", - " \"192.168.178.54\": {\n", - " \"compressed\": \"192.168.178.54\",\n", - " \"exploded\": \"192.168.178.54\",\n", - " \"is_global\": false,\n", - " \"is_link_local\": false,\n", - " \"is_loopback\": false,\n", - " \"is_multicast\": false,\n", - " \"is_private\": true,\n", - " \"is_reserved\": false,\n", - " \"is_unspecified\": false,\n", - " \"max_prefixlen\": 32,\n", - " \"reverse_pointer\": \"54.178.168.192.in-addr.arpa\",\n", - " \"version\": 4\n", - " },\n", - " \"10.10.0.2\": {\n", - " \"compressed\": \"10.10.0.2\",\n", - " \"exploded\": \"10.10.0.2\",\n", - " \"is_global\": false,\n", - " \"is_link_local\": false,\n", - " \"is_loopback\": false,\n", - " \"is_multicast\": false,\n", - " \"is_private\": true,\n", - " \"is_reserved\": false,\n", - " \"is_unspecified\": false,\n", - " \"max_prefixlen\": 32,\n", - " \"reverse_pointer\": \"2.0.10.10.in-addr.arpa\",\n", - " \"version\": 4\n", - " },\n", - " \"fe80::b056:32ff:fe70:1f61\": {\n", - " \"compressed\": \"fe80::b056:32ff:fe70:1f61\",\n", - " \"exploded\": \"fe80:0000:0000:0000:b056:32ff:fe70:1f61\",\n", - " \"ipv4_mapped\": null,\n", - " \"is_global\": false,\n", - " \"is_link_local\": true,\n", - " \"is_loopback\": false,\n", - " \"is_multicast\": false,\n", - " \"is_private\": true,\n", - " \"is_reserved\": false,\n", - " \"is_site_local\": false,\n", - " \"is_unspecified\": false,\n", - " \"max_prefixlen\": 128,\n", - " \"reverse_pointer\": \"1.6.f.1.0.7.e.f.f.f.2.3.6.5.0.b.0.0.0.0.0.0.0.0.0.0.0.0.0.8.e.f.ip6.arpa\",\n", - " \"scope_id\": null,\n", - " \"sixtofour\": null,\n", - " \"teredo\": null,\n", - " \"version\": 6\n", - " }\n", - " }\n", - "}\n", - "True\n" - ] + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"ip_addresses\": {\n", + " \"127.0.0.1\": {\n", + " \"compressed\": \"127.0.0.1\",\n", + " \"exploded\": \"127.0.0.1\",\n", + " \"is_global\": False,\n", + " \"is_link_local\": False,\n", + " \"is_loopback\": True,\n", + " \"is_multicast\": False,\n", + " \"is_private\": True,\n", + " \"is_reserved\": False,\n", + " \"is_unspecified\": False,\n", + " \"max_prefixlen\": 32,\n", + " \"reverse_pointer\": \"1.0.0.127.in-addr.arpa\",\n", + " \"version\": 4\n", + " },\n", + " \"::1\": {\n", + " \"compressed\": \"::1\",\n", + " \"exploded\": \"0000:0000:0000:0000:0000:0000:0000:0001\",\n", + " \"ipv4_mapped\": None,\n", + " \"is_global\": False,\n", + " \"is_link_local\": False,\n", + " \"is_loopback\": True,\n", + " \"is_multicast\": False,\n", + " \"is_private\": True,\n", + " \"is_reserved\": True,\n", + " \"is_site_local\": False,\n", + " \"is_unspecified\": False,\n", + " \"max_prefixlen\": 128,\n", + " \"reverse_pointer\": \"1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa\",\n", + " \"scope_id\": None,\n", + " \"sixtofour\": None,\n", + " \"teredo\": None,\n", + " \"version\": 6\n", + " },\n", + " \"192.168.178.54\": {\n", + " \"compressed\": \"192.168.178.54\",\n", + " \"exploded\": \"192.168.178.54\",\n", + " \"is_global\": False,\n", + " \"is_link_local\": False,\n", + " \"is_loopback\": False,\n", + " \"is_multicast\": False,\n", + " \"is_private\": True,\n", + " \"is_reserved\": False,\n", + " \"is_unspecified\": False,\n", + " \"max_prefixlen\": 32,\n", + " \"reverse_pointer\": \"54.178.168.192.in-addr.arpa\",\n", + " \"version\": 4\n", + " },\n", + " \"10.10.0.2\": {\n", + " \"compressed\": \"10.10.0.2\",\n", + " \"exploded\": \"10.10.0.2\",\n", + " \"is_global\": False,\n", + " \"is_link_local\": False,\n", + " \"is_loopback\": False,\n", + " \"is_multicast\": False,\n", + " \"is_private\": True,\n", + " \"is_reserved\": False,\n", + " \"is_unspecified\": False,\n", + " \"max_prefixlen\": 32,\n", + " \"reverse_pointer\": \"2.0.10.10.in-addr.arpa\",\n", + " \"version\": 4\n", + " },\n", + " \"fe80::b056:32ff:fe70:1f61\": {\n", + " \"compressed\": \"fe80::b056:32ff:fe70:1f61\",\n", + " \"exploded\": \"fe80:0000:0000:0000:b056:32ff:fe70:1f61\",\n", + " \"ipv4_mapped\": None,\n", + " \"is_global\": False,\n", + " \"is_link_local\": True,\n", + " \"is_loopback\": False,\n", + " \"is_multicast\": False,\n", + " \"is_private\": True,\n", + " \"is_reserved\": False,\n", + " \"is_site_local\": False,\n", + " \"is_unspecified\": False,\n", + " \"max_prefixlen\": 128,\n", + " \"reverse_pointer\": \"1.6.f.1.0.7.e.f.f.f.2.3.6.5.0.b.0.0.0.0.0.0.0.0.0.0.0.0.0.8.e.f.ip6.arpa\",\n", + " \"scope_id\": None,\n", + " \"sixtofour\": None,\n", + " \"teredo\": None,\n", + " \"version\": 6\n", + " }\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "from logprep.processor.ip_informer.rule import IpInformerRule\n", + "\n", + "rule_definition = {\n", + " \"filter\": \"ip_addresses\",\n", + " \"ip_informer\": {\n", + " \"source_fields\": [\"ip_addresses\"],\n", + " \"target_field\": \"ip_addresses\",\n", + " \"overwrite_target\": True\n", + " }\n", + "}\n", + "\n", + "rule = IpInformerRule._create_from_dict(rule_definition)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"the_ip_informer_name\":{ \n", + " \"type\": \"ip_informer\",\n", + " \"specific_rules\": [],\n", + " \"generic_rules\": [],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ip_informer" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from logging import getLogger\n", + "from logprep.factory import Factory\n", + "\n", + "logger = getLogger()\n", + "ip_informer = Factory.create(processor_config)\n", + "ip_informer" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "load the rule to the processor:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "ip_informer._specific_tree.add_rule(rule)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {\n", + " \"ip_addresses\": [\n", + " \"127.0.0.1\",\n", + " \"::1\",\n", + " \"192.168.178.54\",\n", + " \"10.10.0.2\",\n", + " \"fe80::b056:32ff:fe70:1f61\"\n", + " ]\n", + "}\n", + "after: {\n", + " \"ip_addresses\": {\n", + " \"127.0.0.1\": {\n", + " \"compressed\": \"127.0.0.1\",\n", + " \"exploded\": \"127.0.0.1\",\n", + " \"is_global\": false,\n", + " \"is_link_local\": false,\n", + " \"is_loopback\": true,\n", + " \"is_multicast\": false,\n", + " \"is_private\": true,\n", + " \"is_reserved\": false,\n", + " \"is_unspecified\": false,\n", + " \"max_prefixlen\": 32,\n", + " \"reverse_pointer\": \"1.0.0.127.in-addr.arpa\",\n", + " \"version\": 4\n", + " },\n", + " \"::1\": {\n", + " \"compressed\": \"::1\",\n", + " \"exploded\": \"0000:0000:0000:0000:0000:0000:0000:0001\",\n", + " \"ipv4_mapped\": null,\n", + " \"is_global\": false,\n", + " \"is_link_local\": false,\n", + " \"is_loopback\": true,\n", + " \"is_multicast\": false,\n", + " \"is_private\": true,\n", + " \"is_reserved\": true,\n", + " \"is_site_local\": false,\n", + " \"is_unspecified\": false,\n", + " \"max_prefixlen\": 128,\n", + " \"reverse_pointer\": \"1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa\",\n", + " \"scope_id\": null,\n", + " \"sixtofour\": null,\n", + " \"teredo\": null,\n", + " \"version\": 6\n", + " },\n", + " \"192.168.178.54\": {\n", + " \"compressed\": \"192.168.178.54\",\n", + " \"exploded\": \"192.168.178.54\",\n", + " \"is_global\": false,\n", + " \"is_link_local\": false,\n", + " \"is_loopback\": false,\n", + " \"is_multicast\": false,\n", + " \"is_private\": true,\n", + " \"is_reserved\": false,\n", + " \"is_unspecified\": false,\n", + " \"max_prefixlen\": 32,\n", + " \"reverse_pointer\": \"54.178.168.192.in-addr.arpa\",\n", + " \"version\": 4\n", + " },\n", + " \"10.10.0.2\": {\n", + " \"compressed\": \"10.10.0.2\",\n", + " \"exploded\": \"10.10.0.2\",\n", + " \"is_global\": false,\n", + " \"is_link_local\": false,\n", + " \"is_loopback\": false,\n", + " \"is_multicast\": false,\n", + " \"is_private\": true,\n", + " \"is_reserved\": false,\n", + " \"is_unspecified\": false,\n", + " \"max_prefixlen\": 32,\n", + " \"reverse_pointer\": \"2.0.10.10.in-addr.arpa\",\n", + " \"version\": 4\n", + " },\n", + " \"fe80::b056:32ff:fe70:1f61\": {\n", + " \"compressed\": \"fe80::b056:32ff:fe70:1f61\",\n", + " \"exploded\": \"fe80:0000:0000:0000:b056:32ff:fe70:1f61\",\n", + " \"ipv4_mapped\": null,\n", + " \"is_global\": false,\n", + " \"is_link_local\": true,\n", + " \"is_loopback\": false,\n", + " \"is_multicast\": false,\n", + " \"is_private\": true,\n", + " \"is_reserved\": false,\n", + " \"is_site_local\": false,\n", + " \"is_unspecified\": false,\n", + " \"max_prefixlen\": 128,\n", + " \"reverse_pointer\": \"1.6.f.1.0.7.e.f.f.f.2.3.6.5.0.b.0.0.0.0.0.0.0.0.0.0.0.0.0.8.e.f.ip6.arpa\",\n", + " \"scope_id\": null,\n", + " \"sixtofour\": null,\n", + " \"teredo\": null,\n", + " \"version\": 6\n", + " }\n", + " }\n", + "}\n", + "True\n" + ] + } + ], + "source": [ + "import json\n", + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {json.dumps(mydocument, indent=2)}\")\n", + "ip_informer.process(mydocument)\n", + "print(f\"after: {json.dumps(mydocument, indent=2)}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "import json\n", - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {json.dumps(mydocument, indent=2)}\")\n", - "ip_informer.process(mydocument)\n", - "print(f\"after: {json.dumps(mydocument, indent=2)}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/key_checker.ipynb b/doc/source/development/notebooks/processor_examples/key_checker.ipynb index 3e805e13b..8a915732a 100644 --- a/doc/source/development/notebooks/processor_examples/key_checker.ipynb +++ b/doc/source/development/notebooks/processor_examples/key_checker.ipynb @@ -1,339 +1,339 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# KeyChecker\n", - "\n", - "This presentations goal it to introduce the features of the `KeyChecker` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "I want to ensure, that the log format does not change unnoticed.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "given log entry:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " \"_systemd_owner_uid\": \"1000\",\n", - " \"_systemd_session\": \"198\",\n", - " \"_hostname\": \"dev-machine\",\n", - " \"_audit_session\": \"198\",\n", - " \"_boot_id\": \"3eef443102284373bb33022da6c23d2b\",\n", - " \"_systemd_unit\": \"session-198.scope\",\n", - " \"_transport\": \"syslog\",\n", - " \"_pid\": \"712694\",\n", - " \"_cmdline\": \"/usr/bin/sudo journalctl --no-pager -o json -f\",\n", - " \"_cap_effective\": \"1ffffffffff\",\n", - " \"__monotonic_timestamp\": \"263250766668\",\n", - " \"_selinux_context\": \"unconfined\\n\",\n", - " \"__realtime_timestamp\": \"1667914601897529\",\n", - " \"_gid\": \"0\",\n", - " \"_uid\": \"1000\",\n", - " \"_systemd_cgroup\": \"/user.slice/user-1000.slice/session-198.scope\",\n", - " \"_comm\": \"sudo\",\n", - " \"_audit_loginuid\": \"1000\",\n", - " \"_systemd_slice\": \"user-1000.slice\",\n", - " \"_machine_id\": \"edafb3b3b3ed4d7a8878309023f456fe\",\n", - " \"syslog_timestamp\": \"nov 8 13:36:41 \",\n", - " \"message\": \"pam_unix(sudo:session): session opened for user root(uid=0) by vagrant(uid=1000)\",\n", - " \"_systemd_user_slice\": \"-.slice\",\n", - " \"syslog_identifier\": \"sudo\",\n", - " \"_systemd_invocation_id\": \"19bb831be8c04629b4df55edf5b3bdcb\",\n", - " \"syslog_facility\": \"10\",\n", - " \"__cursor\": \"s=99e63e2c458b47fcbad587fb0e74be0d;i=21fd41;b=3eef443102284373bb33022da6c23d2b;m=3d4af7eb4c;t=5ecf5a15c9e39;x=36322c30d547bfb8\",\n", - " \"priority\": \"6\",\n", - " \"_exe\": \"/usr/bin/sudo\",\n", - " \"_source_realtime_timestamp\": \"1667914601879236\",\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"_systemd_owner_uid\": \"1000\",\n", - " \"_systemd_session\": \"198\",\n", - " \"_hostname\": \"dev-machine\",\n", - " \"_audit_session\": \"198\",\n", - " \"_boot_id\": \"3eef443102284373bb33022da6c23d2b\",\n", - " \"_systemd_unit\": \"session-198.scope\",\n", - " \"_transport\": \"syslog\",\n", - " \"_pid\": \"712694\",\n", - " \"_cmdline\": \"/usr/bin/sudo journalctl --no-pager -o json -f\",\n", - " \"_cap_effective\": \"1ffffffffff\",\n", - " \"__monotonic_timestamp\": \"263250766668\",\n", - " \"_selinux_context\": \"unconfined\\n\",\n", - " \"__realtime_timestamp\": \"1667914601897529\",\n", - " \"_gid\": \"0\",\n", - " \"_uid\": \"1000\",\n", - " \"_systemd_cgroup\": \"/user.slice/user-1000.slice/session-198.scope\",\n", - " \"_comm\": \"sudo\",\n", - " \"_audit_loginuid\": \"1000\",\n", - " \"_systemd_slice\": \"user-1000.slice\",\n", - " \"_machine_id\": \"edafb3b3b3ed4d7a8878309023f456fe\",\n", - " \"syslog_timestamp\": \"nov 8 13:36:41 \",\n", - " \"message\": \"pam_unix(sudo:session): session opened for user root(uid=0) by vagrant(uid=1000)\",\n", - " \"_systemd_user_slice\": \"-.slice\",\n", - " \"syslog_identifier\": \"sudo\",\n", - " \"_systemd_invocation_id\": \"19bb831be8c04629b4df55edf5b3bdcb\",\n", - " \"syslog_facility\": \"10\",\n", - " \"__cursor\": \"s=99e63e2c458b47fcbad587fb0e74be0d;i=21fd41;b=3eef443102284373bb33022da6c23d2b;m=3d4af7eb4c;t=5ecf5a15c9e39;x=36322c30d547bfb8\",\n", - " \"priority\": \"6\",\n", - " \"_exe\": \"/usr/bin/sudo\",\n", - " \"_source_realtime_timestamp\": \"1667914601879236\",\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "675" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# KeyChecker\n", + "\n", + "This presentations goal it to introduce the features of the `KeyChecker` and how to configure it." ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pathlib import Path\n", - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"message\"\n", - "key_checker:\n", - " source_fields:\n", - " - _systemd_owner_uid\n", - " - _systemd_session\n", - " - _hostname \n", - " - _audit_session\n", - " - _boot_id\n", - " - _systemd_unit\n", - " - _transport\n", - " - _pid\n", - " - _cmdline\n", - " - _cap_effective\n", - " - __monotonic_timestamp\n", - " - _selinux_context\n", - " - __realtime_timestamp\n", - " - _gid\n", - " - _uid\n", - " - _systemd_cgroup\n", - " - _comm\n", - " - _audit_loginuid\n", - " - _systemd_slice\n", - " - _machine_id\n", - " - syslog_timestamp\n", - " - message\n", - " - _systemd_user_slice\n", - " - syslog_identifier\n", - " - _systemd_invocation_id\n", - " - syslog_facility\n", - " - __cursor\n", - " - priority\n", - " - _exe\n", - " - _source_realtime_timestamp\n", - " target_field: missing_fields\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"data-stream.yml\"\n", - "rule_file.write_text(rule_yaml)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"almighty_keychecker\": {\n", - " \"type\": \"key_checker\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ + }, { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'logprep'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39msys\u001b[39;00m\n\u001b[1;32m 3\u001b[0m sys\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m..\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mfactory\u001b[39;00m \u001b[39mimport\u001b[39;00m Factory\n\u001b[1;32m 6\u001b[0m mock_logger \u001b[39m=\u001b[39m mock\u001b[39m.\u001b[39mMagicMock()\n\u001b[1;32m 7\u001b[0m keychecker \u001b[39m=\u001b[39m Factory\u001b[39m.\u001b[39mcreate(processor_config, mock_logger)\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'logprep'" - ] - } - ], - "source": [ - "from unittest import mock\n", - "import sys\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "keychecker = Factory.create(processor_config, mock_logger)\n", - "keychecker\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event\n", - "\n", - "#### Case 1 - nothing changed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "mydocument = deepcopy(document)\n", - "keychecker.process(mydocument)\n", - "assert mydocument == expected\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Case 2 - missing field" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "I want to ensure, that the log format does not change unnoticed.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "given log entry:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " \"_systemd_owner_uid\": \"1000\",\n", + " \"_systemd_session\": \"198\",\n", + " \"_hostname\": \"dev-machine\",\n", + " \"_audit_session\": \"198\",\n", + " \"_boot_id\": \"3eef443102284373bb33022da6c23d2b\",\n", + " \"_systemd_unit\": \"session-198.scope\",\n", + " \"_transport\": \"syslog\",\n", + " \"_pid\": \"712694\",\n", + " \"_cmdline\": \"/usr/bin/sudo journalctl --no-pager -o json -f\",\n", + " \"_cap_effective\": \"1ffffffffff\",\n", + " \"__monotonic_timestamp\": \"263250766668\",\n", + " \"_selinux_context\": \"unconfined\\n\",\n", + " \"__realtime_timestamp\": \"1667914601897529\",\n", + " \"_gid\": \"0\",\n", + " \"_uid\": \"1000\",\n", + " \"_systemd_cgroup\": \"/user.slice/user-1000.slice/session-198.scope\",\n", + " \"_comm\": \"sudo\",\n", + " \"_audit_loginuid\": \"1000\",\n", + " \"_systemd_slice\": \"user-1000.slice\",\n", + " \"_machine_id\": \"edafb3b3b3ed4d7a8878309023f456fe\",\n", + " \"syslog_timestamp\": \"nov 8 13:36:41 \",\n", + " \"message\": \"pam_unix(sudo:session): session opened for user root(uid=0) by vagrant(uid=1000)\",\n", + " \"_systemd_user_slice\": \"-.slice\",\n", + " \"syslog_identifier\": \"sudo\",\n", + " \"_systemd_invocation_id\": \"19bb831be8c04629b4df55edf5b3bdcb\",\n", + " \"syslog_facility\": \"10\",\n", + " \"__cursor\": \"s=99e63e2c458b47fcbad587fb0e74be0d;i=21fd41;b=3eef443102284373bb33022da6c23d2b;m=3d4af7eb4c;t=5ecf5a15c9e39;x=36322c30d547bfb8\",\n", + " \"priority\": \"6\",\n", + " \"_exe\": \"/usr/bin/sudo\",\n", + " \"_source_realtime_timestamp\": \"1667914601879236\",\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "False\n" - ] + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"_systemd_owner_uid\": \"1000\",\n", + " \"_systemd_session\": \"198\",\n", + " \"_hostname\": \"dev-machine\",\n", + " \"_audit_session\": \"198\",\n", + " \"_boot_id\": \"3eef443102284373bb33022da6c23d2b\",\n", + " \"_systemd_unit\": \"session-198.scope\",\n", + " \"_transport\": \"syslog\",\n", + " \"_pid\": \"712694\",\n", + " \"_cmdline\": \"/usr/bin/sudo journalctl --no-pager -o json -f\",\n", + " \"_cap_effective\": \"1ffffffffff\",\n", + " \"__monotonic_timestamp\": \"263250766668\",\n", + " \"_selinux_context\": \"unconfined\\n\",\n", + " \"__realtime_timestamp\": \"1667914601897529\",\n", + " \"_gid\": \"0\",\n", + " \"_uid\": \"1000\",\n", + " \"_systemd_cgroup\": \"/user.slice/user-1000.slice/session-198.scope\",\n", + " \"_comm\": \"sudo\",\n", + " \"_audit_loginuid\": \"1000\",\n", + " \"_systemd_slice\": \"user-1000.slice\",\n", + " \"_machine_id\": \"edafb3b3b3ed4d7a8878309023f456fe\",\n", + " \"syslog_timestamp\": \"nov 8 13:36:41 \",\n", + " \"message\": \"pam_unix(sudo:session): session opened for user root(uid=0) by vagrant(uid=1000)\",\n", + " \"_systemd_user_slice\": \"-.slice\",\n", + " \"syslog_identifier\": \"sudo\",\n", + " \"_systemd_invocation_id\": \"19bb831be8c04629b4df55edf5b3bdcb\",\n", + " \"syslog_facility\": \"10\",\n", + " \"__cursor\": \"s=99e63e2c458b47fcbad587fb0e74be0d;i=21fd41;b=3eef443102284373bb33022da6c23d2b;m=3d4af7eb4c;t=5ecf5a15c9e39;x=36322c30d547bfb8\",\n", + " \"priority\": \"6\",\n", + " \"_exe\": \"/usr/bin/sudo\",\n", + " \"_source_realtime_timestamp\": \"1667914601879236\",\n", + "}\n" + ] }, { - "data": { - "text/plain": [ - "['_hostname', 'syslog_timestamp']" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "675" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pathlib import Path\n", + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"message\"\n", + "key_checker:\n", + " source_fields:\n", + " - _systemd_owner_uid\n", + " - _systemd_session\n", + " - _hostname \n", + " - _audit_session\n", + " - _boot_id\n", + " - _systemd_unit\n", + " - _transport\n", + " - _pid\n", + " - _cmdline\n", + " - _cap_effective\n", + " - __monotonic_timestamp\n", + " - _selinux_context\n", + " - __realtime_timestamp\n", + " - _gid\n", + " - _uid\n", + " - _systemd_cgroup\n", + " - _comm\n", + " - _audit_loginuid\n", + " - _systemd_slice\n", + " - _machine_id\n", + " - syslog_timestamp\n", + " - message\n", + " - _systemd_user_slice\n", + " - syslog_identifier\n", + " - _systemd_invocation_id\n", + " - syslog_facility\n", + " - __cursor\n", + " - priority\n", + " - _exe\n", + " - _source_realtime_timestamp\n", + " target_field: missing_fields\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"concatenator\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"data-stream.yml\"\n", + "rule_file.write_text(rule_yaml)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"almighty_keychecker\": {\n", + " \"type\": \"key_checker\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'logprep'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39msys\u001b[39;00m\n\u001b[1;32m 3\u001b[0m sys\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mappend(\u001b[39m\"\u001b[39m\u001b[39m..\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mfactory\u001b[39;00m \u001b[39mimport\u001b[39;00m Factory\n\u001b[1;32m 6\u001b[0m mock_logger \u001b[39m=\u001b[39m mock\u001b[39m.\u001b[39mMagicMock()\n\u001b[1;32m 7\u001b[0m keychecker \u001b[39m=\u001b[39m Factory\u001b[39m.\u001b[39mcreate(processor_config, mock_logger)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'logprep'" + ] + } + ], + "source": [ + "from unittest import mock\n", + "import sys\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "keychecker = Factory.create(processor_config)\n", + "keychecker\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event\n", + "\n", + "#### Case 1 - nothing changed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "\n", + "mydocument = deepcopy(document)\n", + "keychecker.process(mydocument)\n", + "assert mydocument == expected\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Case 2 - missing field" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + }, + { + "data": { + "text/plain": [ + "['_hostname', 'syslog_timestamp']" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mydocument.pop(\"_hostname\")\n", + "mydocument.pop(\"syslog_timestamp\")\n", + "keychecker.process(mydocument)\n", + "print(mydocument == expected)\n", + "mydocument[\"missing_fields\"]" ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" } - ], - "source": [ - "mydocument.pop(\"_hostname\")\n", - "mydocument.pop(\"syslog_timestamp\")\n", - "keychecker.process(mydocument)\n", - "print(mydocument == expected)\n", - "mydocument[\"missing_fields\"]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.10.6 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.6 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + } + } }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/requester.ipynb b/doc/source/development/notebooks/processor_examples/requester.ipynb index 8e6bec3ed..ff5549d70 100644 --- a/doc/source/development/notebooks/processor_examples/requester.ipynb +++ b/doc/source/development/notebooks/processor_examples/requester.ipynb @@ -1,246 +1,246 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Requester\n", - "\n", - "This presentations goal is it to introduce the features of the `Requester` and how to configure it." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenge\n", - "\n", - "* I want to trigger external systems with or without field values from the currently processed event in the requests payload\n", - "* I want to enrich events by external systems" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " 'message': {\n", - " \"hostname\": \"H34222S3\"\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " 'message': {\n", - " 'hostname': 'H34222S3',\n", - " 'location': {\n", - " 'building': 'H5', \n", - " 'floor': '3', \n", - " 'room': '123'\n", - " }\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/plain": [ - "153" + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Requester\n", + "\n", + "This presentations goal is it to introduce the features of the `Requester` and how to configure it." ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: \"message.hostname\"\n", - "requester:\n", - " target_field_mapping:\n", - " location: message.location\n", - " method: GET\n", - " url: http://localhost:32000/requester_api_example.json\n", - " \n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"requester\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"requester.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"cmdbrequests\":{ \n", - " \"type\": \"requester\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "requester" + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenge\n", + "\n", + "* I want to trigger external systems with or without field values from the currently processed event in the requests payload\n", + "* I want to enrich events by external systems" ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "requester = Factory.create(processor_config, mock_logger)\n", - "requester" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " 'message': {\n", + " \"hostname\": \"H34222S3\"\n", + " }\n", + " }" + ] + }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "2022-12-13 02:24:31,539 urllib3.connectionpool DEBUG : Starting new HTTP connection (1): localhost:32000\n", - "127.0.0.1 - - [13/Dec/2022 02:24:31] \"GET /api.json HTTP/1.1\" 200 -\n", - "2022-12-13 02:24:31,542 urllib3.connectionpool DEBUG : http://localhost:32000 \"GET /api.json HTTP/1.1\" 200 97\n" - ] + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "before: {'message': {'hostname': 'H34222S3'}}\n", - "after: {'message': {'hostname': 'H34222S3', 'location': {'building': 'H5', 'floor': '3', 'room': '123'}}}\n", - "True\n" - ] + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " 'message': {\n", + " 'hostname': 'H34222S3',\n", + " 'location': {\n", + " 'building': 'H5', \n", + " 'floor': '3', \n", + " 'room': '123'\n", + " }\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "153" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: \"message.hostname\"\n", + "requester:\n", + " target_field_mapping:\n", + " location: message.location\n", + " method: GET\n", + " url: http://localhost:32000/requester_api_example.json\n", + " \n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"requester\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"requester.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"cmdbrequests\":{ \n", + " \"type\": \"requester\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "requester" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "requester = Factory.create(processor_config)\n", + "requester" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2022-12-13 02:24:31,539 urllib3.connectionpool DEBUG : Starting new HTTP connection (1): localhost:32000\n", + "127.0.0.1 - - [13/Dec/2022 02:24:31] \"GET /api.json HTTP/1.1\" 200 -\n", + "2022-12-13 02:24:31,542 urllib3.connectionpool DEBUG : http://localhost:32000 \"GET /api.json HTTP/1.1\" 200 97\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "before: {'message': {'hostname': 'H34222S3'}}\n", + "after: {'message': {'hostname': 'H34222S3', 'location': {'building': 'H5', 'floor': '3', 'room': '123'}}}\n", + "True\n" + ] + } + ], + "source": [ + "from copy import deepcopy\n", + "from tests.acceptance.util import TestServer\n", + "mydocument = deepcopy(document)\n", + "\n", + "with TestServer.run_in_thread():\n", + " print(f\"before: {mydocument}\")\n", + " requester.process(mydocument)\n", + " print(f\"after: {mydocument}\")\n", + " print(mydocument == expected)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } } - ], - "source": [ - "from copy import deepcopy\n", - "from tests.acceptance.util import TestServer\n", - "mydocument = deepcopy(document)\n", - "\n", - "with TestServer.run_in_thread():\n", - " print(f\"before: {mydocument}\")\n", - " requester.process(mydocument)\n", - " print(f\"after: {mydocument}\")\n", - " print(mydocument == expected)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.15" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/string_splitter.ipynb b/doc/source/development/notebooks/processor_examples/string_splitter.ipynb index 7c51a86a0..a29da9874 100644 --- a/doc/source/development/notebooks/processor_examples/string_splitter.ipynb +++ b/doc/source/development/notebooks/processor_examples/string_splitter.ipynb @@ -1,269 +1,269 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# StringSplitter\n", - "\n", - "This presentation's goal it to introduce the features of the `StringSplitter` and how to configure it." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The challenges\n", - "\n", - "- I want to split strings of varying length contained in a source field\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "given preprocessed log entry:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " \"ip_addresses\": \"192.168.5.1, 10.10.2.1, fe80::, 127.0.0.1\"\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rules and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rules:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ + "cells": [ { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'logprep.processor.string_splitter'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[10], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39msys\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[39m# sys.path.append(\"../../../../../\")\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mprocessor\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mstring_splitter\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mrule\u001b[39;00m \u001b[39mimport\u001b[39;00m StringSplitterRule\n\u001b[1;32m 5\u001b[0m rules_definitions \u001b[39m=\u001b[39m [\n\u001b[1;32m 6\u001b[0m {\n\u001b[1;32m 7\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mfilter\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mip_addresses\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m }\n\u001b[1;32m 13\u001b[0m ]\n\u001b[1;32m 14\u001b[0m rules \u001b[39m=\u001b[39m [StringSplitterRule\u001b[39m.\u001b[39m_create_from_dict(rule_dict) \u001b[39mfor\u001b[39;00m rule_dict \u001b[39min\u001b[39;00m rules_definitions]\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'logprep.processor.string_splitter'" - ] - } - ], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "\n", - "from logprep.processor.string_splitter.rule import StringSplitterRule\n", - "rules_definitions = [\n", - " {\n", - " \"filter\": \"ip_addresses\",\n", - " \"string_splitter\": {\n", - " \"source_fields\": [\"ip_addresses\"],\n", - " \"target_field\": \"ip_address_list\"\n", - " },\n", - " }\n", - "]\n", - "rules = [StringSplitterRule._create_from_dict(rule_dict) for rule_dict in rules_definitions]\n", - "rules" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"allmighty_string_splitter\": {\n", - " \"type\": \"string_splitter\",\n", - " \"specific_rules\": [\"/dev\"],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# StringSplitter\n", + "\n", + "This presentation's goal it to introduce the features of the `StringSplitter` and how to configure it." + ] + }, { - "data": { - "text/plain": [ - "string_splitter" + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The challenges\n", + "\n", + "- I want to split strings of varying length contained in a source field\n" ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from logging import getLogger\n", - "from logprep.factory import Factory\n", - "\n", - "logger = getLogger()\n", - "\n", - "processor = Factory.create(processor_config, logger)\n", - "processor\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "load rules to processor" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "[filter=\"ip_addresses\", StringSplitterRule.Config(description='', regex_fields=[], tests=[], tag_on_failure=['_string_splitter_failure'], source_fields=['ip_addresses'], target_field='ip_addresses', delete_source_fields=False, overwrite_target=True, extend_target_list=False, delimeter=' ')]" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "given preprocessed log entry:" ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "for rule in rules:\n", - " processor._specific_tree.add_rule(rule)\n", - " \n", - "processor._specific_rules" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "\n", - "mydocument = deepcopy(document)\n", - "processor.process(mydocument)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Check Results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "{'ip_addresses': '192.168.5.1, 10.10.2.1, fe80::, 127.0.0.1'}" + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " \"ip_addresses\": \"192.168.5.1, 10.10.2.1, fe80::, 127.0.0.1\"\n", + "}\n" ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "document" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rules and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rules:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'logprep.processor.string_splitter'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39msys\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[39m# sys.path.append(\"../../../../../\")\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlogprep\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mprocessor\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mstring_splitter\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mrule\u001b[39;00m \u001b[39mimport\u001b[39;00m StringSplitterRule\n\u001b[1;32m 5\u001b[0m rules_definitions \u001b[39m=\u001b[39m [\n\u001b[1;32m 6\u001b[0m {\n\u001b[1;32m 7\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mfilter\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mip_addresses\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 12\u001b[0m }\n\u001b[1;32m 13\u001b[0m ]\n\u001b[1;32m 14\u001b[0m rules \u001b[39m=\u001b[39m [StringSplitterRule\u001b[39m.\u001b[39m_create_from_dict(rule_dict) \u001b[39mfor\u001b[39;00m rule_dict \u001b[39min\u001b[39;00m rules_definitions]\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'logprep.processor.string_splitter'" + ] + } + ], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "\n", + "from logprep.processor.string_splitter.rule import StringSplitterRule\n", + "rules_definitions = [\n", + " {\n", + " \"filter\": \"ip_addresses\",\n", + " \"string_splitter\": {\n", + " \"source_fields\": [\"ip_addresses\"],\n", + " \"target_field\": \"ip_address_list\"\n", + " },\n", + " }\n", + "]\n", + "rules = [StringSplitterRule._create_from_dict(rule_dict) for rule_dict in rules_definitions]\n", + "rules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"allmighty_string_splitter\": {\n", + " \"type\": \"string_splitter\",\n", + " \"specific_rules\": [\"/dev\"],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "string_splitter" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from logging import getLogger\n", + "from logprep.factory import Factory\n", + "\n", + "logger = getLogger()\n", + "\n", + "processor = Factory.create(processor_config)\n", + "processor\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "load rules to processor" + ] + }, { - "data": { - "text/plain": [ - "{'ip_addresses': ['192.168.5.1,', '10.10.2.1,', 'fe80::,', '127.0.0.1']}" + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[filter=\"ip_addresses\", StringSplitterRule.Config(description='', regex_fields=[], tests=[], tag_on_failure=['_string_splitter_failure'], source_fields=['ip_addresses'], target_field='ip_addresses', delete_source_fields=False, overwrite_target=True, extend_target_list=False, delimeter=' ')]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for rule in rules:\n", + " processor._specific_tree.add_rule(rule)\n", + " \n", + "processor._specific_rules" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "\n", + "mydocument = deepcopy(document)\n", + "processor.process(mydocument)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check Results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ip_addresses': '192.168.5.1, 10.10.2.1, fe80::, 127.0.0.1'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "document" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ip_addresses': ['192.168.5.1,', '10.10.2.1,', 'fe80::,', '127.0.0.1']}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mydocument" ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" } - ], - "source": [ - "mydocument" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.11.0 ('.venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.1" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.11.0 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" + } + } }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "586280540a85d3e21edc698fe7b86af2848b9b02644e6c22463da25c40a3f1be" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/timestamp_differ.ipynb b/doc/source/development/notebooks/processor_examples/timestamp_differ.ipynb index 33ebf6cd6..35b7745d6 100644 --- a/doc/source/development/notebooks/processor_examples/timestamp_differ.ipynb +++ b/doc/source/development/notebooks/processor_examples/timestamp_differ.ipynb @@ -1,195 +1,195 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# TimestampDiffer\n", - "\n", - "This presentations goal it to introduce the features of the `timestamp_differ` and how to configure it." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The challenge\n", - "\n", - "I want calculate the time delta between two timestamps with different timestamp formats." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " \"times\": {\n", - " \"ingest\": \"06-12-2022T10:00:00\",\n", - " \"processed\": \"2022-12-06 10:00:05\", \n", - " },\n", - " \"more\": \"event data\"\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"times\": {\n", - " \"ingest\": \"06-12-2022T10:00:00\",\n", - " \"processed\": \"2022-12-06 10:00:05\", \n", - " \"processing_time\": \"5000.0\",\n", - " },\n", - " \"more\": \"event data\"\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - "filter: 'times.ingest AND times.processed'\n", - "timestamp_differ:\n", - " diff: ${times.processed:%Y-%m-%d %H:%M:%S} - ${times.ingest:%d-%m-%YT%H:%M:%S}\n", - " target_field: times.processing_time\n", - " output_format: milliseconds\n", - "description: '...'\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"timestamp_differ\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"timestamp_differ.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"my_timestampdiffer\":{ \n", - " \"type\": \"timestamp_differ\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "processor = Factory.create(processor_config, mock_logger)\n", - "processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "processor.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TimestampDiffer\n", + "\n", + "This presentations goal it to introduce the features of the `timestamp_differ` and how to configure it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The challenge\n", + "\n", + "I want calculate the time delta between two timestamps with different timestamp formats." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " \"times\": {\n", + " \"ingest\": \"06-12-2022T10:00:00\",\n", + " \"processed\": \"2022-12-06 10:00:05\", \n", + " },\n", + " \"more\": \"event data\"\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"times\": {\n", + " \"ingest\": \"06-12-2022T10:00:00\",\n", + " \"processed\": \"2022-12-06 10:00:05\", \n", + " \"processing_time\": \"5000.0\",\n", + " },\n", + " \"more\": \"event data\"\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + "filter: 'times.ingest AND times.processed'\n", + "timestamp_differ:\n", + " diff: ${times.processed:%Y-%m-%d %H:%M:%S} - ${times.ingest:%d-%m-%YT%H:%M:%S}\n", + " target_field: times.processing_time\n", + " output_format: milliseconds\n", + "description: '...'\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"timestamp_differ\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"timestamp_differ.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"my_timestampdiffer\":{ \n", + " \"type\": \"timestamp_differ\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "processor = Factory.create(processor_config)\n", + "processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "processor.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/doc/source/development/notebooks/processor_examples/timestamper.ipynb b/doc/source/development/notebooks/processor_examples/timestamper.ipynb index c0173503c..4ba960dfd 100644 --- a/doc/source/development/notebooks/processor_examples/timestamper.ipynb +++ b/doc/source/development/notebooks/processor_examples/timestamper.ipynb @@ -1,199 +1,199 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Timestamper\n", - "\n", - "This presentations goal it to introduce the features of the `timestamper` and how to configure it." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The challenge\n", - "\n", - "I want normalize different time formats to one output format and timezone." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "from this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "document = {\n", - " \"winlog\": {\n", - " \"api\": \"wineventlog\",\n", - " \"event_id\": 123456789,\n", - " \"event_data\": {\"some_timestamp_utc\": \"1642160449\"},\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "to this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "expected = {\n", - " \"@timestamp\": \"2022-01-14T12:40:49+01:00\",\n", - " \"winlog\": {\n", - " \"api\": \"wineventlog\",\n", - " \"event_id\": 123456789,\n", - " \"event_data\": {\"some_timestamp_utc\": \"1642160449\"},\n", - " },\n", - "}\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create rule and processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the rule:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "sys.path.append(\"../../../../../\")\n", - "import tempfile\n", - "from pathlib import Path\n", - "\n", - "rule_yaml = \"\"\"---\n", - " filter: \"winlog.event_id: 123456789\"\n", - " timestamper: \n", - " source_fields: [\"winlog.event_data.some_timestamp_utc\"]\n", - " target_field: \"@timestamp\"\n", - " source_format: UNIX\n", - " source_timezone: UTC\n", - " target_timezone: Europe/Berlin\n", - " description: example timestamper rule\n", - "\"\"\"\n", - "\n", - "rule_path = Path(tempfile.gettempdir()) / \"timestamper\"\n", - "rule_path.mkdir(exist_ok=True)\n", - "rule_file = rule_path / \"timestamper.yml\"\n", - "rule_file.write_text(rule_yaml)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor config:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "processor_config = {\n", - " \"my_timestamper\":{ \n", - " \"type\": \"timestamper\",\n", - " \"specific_rules\": [str(rule_path)],\n", - " \"generic_rules\": [\"/dev\"],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "create the processor with the factory:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from unittest import mock\n", - "from logprep.factory import Factory\n", - "\n", - "mock_logger = mock.MagicMock()\n", - "processor = Factory.create(processor_config, mock_logger)\n", - "processor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Process event" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import deepcopy\n", - "mydocument = deepcopy(document)\n", - "\n", - "\n", - "print(f\"before: {mydocument}\")\n", - "processor.process(mydocument)\n", - "print(f\"after: {mydocument}\")\n", - "print(mydocument == expected)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Timestamper\n", + "\n", + "This presentations goal it to introduce the features of the `timestamper` and how to configure it." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The challenge\n", + "\n", + "I want normalize different time formats to one output format and timezone." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "document = {\n", + " \"winlog\": {\n", + " \"api\": \"wineventlog\",\n", + " \"event_id\": 123456789,\n", + " \"event_data\": {\"some_timestamp_utc\": \"1642160449\"},\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "to this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expected = {\n", + " \"@timestamp\": \"2022-01-14T12:40:49+01:00\",\n", + " \"winlog\": {\n", + " \"api\": \"wineventlog\",\n", + " \"event_id\": 123456789,\n", + " \"event_data\": {\"some_timestamp_utc\": \"1642160449\"},\n", + " },\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create rule and processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the rule:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append(\"../../../../../\")\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "rule_yaml = \"\"\"---\n", + " filter: \"winlog.event_id: 123456789\"\n", + " timestamper: \n", + " source_fields: [\"winlog.event_data.some_timestamp_utc\"]\n", + " target_field: \"@timestamp\"\n", + " source_format: UNIX\n", + " source_timezone: UTC\n", + " target_timezone: Europe/Berlin\n", + " description: example timestamper rule\n", + "\"\"\"\n", + "\n", + "rule_path = Path(tempfile.gettempdir()) / \"timestamper\"\n", + "rule_path.mkdir(exist_ok=True)\n", + "rule_file = rule_path / \"timestamper.yml\"\n", + "rule_file.write_text(rule_yaml)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor config:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "processor_config = {\n", + " \"my_timestamper\":{ \n", + " \"type\": \"timestamper\",\n", + " \"specific_rules\": [str(rule_path)],\n", + " \"generic_rules\": [\"/dev\"],\n", + " }\n", + " }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "create the processor with the factory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unittest import mock\n", + "from logprep.factory import Factory\n", + "\n", + "mock_logger = mock.MagicMock()\n", + "processor = Factory.create(processor_config)\n", + "processor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process event" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "mydocument = deepcopy(document)\n", + "\n", + "\n", + "print(f\"before: {mydocument}\")\n", + "processor.process(mydocument)\n", + "print(f\"after: {mydocument}\")\n", + "print(mydocument == expected)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/logprep/abc/component.py b/logprep/abc/component.py index 36ea747b8..7a0678d7c 100644 --- a/logprep/abc/component.py +++ b/logprep/abc/component.py @@ -2,7 +2,6 @@ from abc import ABC from functools import cached_property -from logging import Logger from typing import Callable import msgspec @@ -43,7 +42,6 @@ def __attrs_post_init__(self): name: str _scheduler = Scheduler() - _logger: Logger _config: Config _decoder: msgspec.json.Decoder = msgspec.json.Decoder() _encoder: msgspec.json.Encoder = msgspec.json.Encoder() @@ -53,8 +51,7 @@ def metric_labels(self) -> dict: """Labels for the metrics""" return {"component": self._config.type, "name": self.name, "description": "", "type": ""} - def __init__(self, name: str, configuration: "Component.Config", logger: Logger): - self._logger = logger + def __init__(self, name: str, configuration: "Component.Config"): self._config = configuration self.name = name diff --git a/logprep/abc/output.py b/logprep/abc/output.py index f91c63c01..fd9d128c6 100644 --- a/logprep/abc/output.py +++ b/logprep/abc/output.py @@ -73,8 +73,8 @@ def metric_labels(self) -> dict: "name": self.name, } - def __init__(self, name: str, configuration: "Connector.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "Connector.Config"): + super().__init__(name, configuration) self.input_connector = None @abstractmethod diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index 9508dce40..cf6e37445 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -1,7 +1,7 @@ """Abstract module for processors""" +import logging from abc import abstractmethod -from logging import DEBUG, Logger from pathlib import Path from typing import TYPE_CHECKING, List, Optional @@ -27,6 +27,8 @@ if TYPE_CHECKING: from logprep.processor.base.rule import Rule # pragma: no cover +logger = logging.getLogger("Processor") + class Processor(Component): """Abstract Processor Class to define the Interface""" @@ -83,8 +85,8 @@ class Config(Component.Config): _generic_tree: RuleTree _strategy = None - def __init__(self, name: str, configuration: "Processor.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "Processor.Config"): + super().__init__(name, configuration) self._specific_tree = RuleTree( processor_name=self.name, processor_config=self._config, @@ -151,7 +153,7 @@ def process(self, event: dict): A dictionary representing a log event. """ - self._logger.debug(f"{self.describe()} processing event {event}") + logger.debug(f"{self.describe()} processing event {event}") self._process_rule_tree(event, self._specific_tree) self._process_rule_tree(event, self._generic_tree) @@ -247,16 +249,16 @@ def load_rules(self, specific_rules_targets: List[str], generic_rules_targets: L for specific_rules_target in specific_rules_targets: rules = self.rule_class.create_rules_from_target(specific_rules_target, self.name) for rule in rules: - self._specific_tree.add_rule(rule, self._logger) + self._specific_tree.add_rule(rule) for generic_rules_target in generic_rules_targets: rules = self.rule_class.create_rules_from_target(generic_rules_target, self.name) for rule in rules: - self._generic_tree.add_rule(rule, self._logger) - if self._logger.isEnabledFor(DEBUG): # pragma: no cover + self._generic_tree.add_rule(rule) + if logger.isEnabledFor(logging.DEBUG): # pragma: no cover number_specific_rules = self._specific_tree.number_of_rules - self._logger.debug(f"{self.describe()} loaded {number_specific_rules} specific rules") + logger.debug(f"{self.describe()} loaded {number_specific_rules} specific rules") number_generic_rules = self._generic_tree.number_of_rules - self._logger.debug(f"{self.describe()} loaded {number_generic_rules} generic rules") + logger.debug(f"{self.describe()} loaded {number_generic_rules} generic rules") @staticmethod def _field_exists(event: dict, dotted_field: str) -> bool: @@ -278,9 +280,9 @@ def _handle_warning_error(self, event, rule, error, failure_tags=None): else: add_and_overwrite(event, "tags", sorted(list({*tags, *failure_tags}))) if isinstance(error, ProcessingWarning): - self._logger.warning(str(error)) + logger.warning(str(error)) else: - self._logger.warning(str(ProcessingWarning(str(error), rule, event))) + logger.warning(str(ProcessingWarning(str(error), rule, event))) def _has_missing_values(self, event, rule, source_field_dict): missing_fields = list( diff --git a/logprep/connector/confluent_kafka/input.py b/logprep/connector/confluent_kafka/input.py index 8cae34d0a..c0d28f78b 100644 --- a/logprep/connector/confluent_kafka/input.py +++ b/logprep/connector/confluent_kafka/input.py @@ -28,8 +28,8 @@ auto.offset.reset: "earliest" """ # pylint: enable=line-too-long +import logging from functools import cached_property, partial -from logging import Logger from socket import getfqdn from typing import Callable, Optional, Tuple, Union @@ -75,6 +75,8 @@ DEFAULT_RETURN = 0 +logger = logging.getLogger("KafkaInput") + class ConfluentKafkaInput(Input): """A kafka input connector.""" @@ -237,8 +239,8 @@ class Config(Input.Config): __slots__ = ["_last_valid_records"] - def __init__(self, name: str, configuration: "Connector.Config", logger: Logger) -> None: - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "Connector.Config") -> None: + super().__init__(name, configuration) self._last_valid_records = {} @cached_property @@ -251,7 +253,7 @@ def _consumer(self) -> Consumer: confluent_kafka consumer object """ injected_config = { - "logger": self._logger, + "logger": logger, "on_commit": self._commit_callback, "stats_cb": self._stats_callback, "error_cb": self._error_callback, @@ -278,7 +280,7 @@ def _error_callback(self, error: KafkaException) -> None: the error that occurred """ self.metrics.number_of_errors += 1 - self._logger.error(f"{self.describe()}: {error}") + logger.error(f"{self.describe()}: {error}") def _stats_callback(self, stats: str) -> None: """Callback for statistics data. This callback is triggered by poll() @@ -451,10 +453,11 @@ def _handle_offsets(self, offset_handler: Callable) -> None: def _assign_callback(self, consumer, topic_partitions): for topic_partition in topic_partitions: offset, partition = topic_partition.offset, topic_partition.partition - self._logger.info( - f"{consumer.memberid()} was assigned to " - f"topic: {topic_partition.topic} | " - f"partition {partition}" + logger.info( + "%s was assigned to topic: %s | partition %s", + consumer.memberid(), + topic_partition.topic, + partition, ) if offset in SPECIAL_OFFSETS: offset = 0 @@ -466,10 +469,11 @@ def _assign_callback(self, consumer, topic_partitions): def _revoke_callback(self, consumer, topic_partitions): for topic_partition in topic_partitions: self.metrics.number_of_warnings += 1 - self._logger.warning( - f"{consumer.memberid()} to be revoked from " - f"topic: {topic_partition.topic} | " - f"partition {topic_partition.partition}" + logger.warning( + "%s to be revoked from topic: %s | partition %s", + consumer.memberid(), + topic_partition.topic, + topic_partition.partition, ) self.output_connector._write_backlog() self.batch_finished_callback() @@ -477,11 +481,11 @@ def _revoke_callback(self, consumer, topic_partitions): def _lost_callback(self, consumer, topic_partitions): for topic_partition in topic_partitions: self.metrics.number_of_warnings += 1 - self._logger.warning( - f"{consumer.memberid()} has lost " - f"topic: {topic_partition.topic} | " - f"partition {topic_partition.partition}" - "- try to reassign" + logger.warning( + "%s has lost topic: %s | partition %s - try to reassign", + consumer.memberid(), + topic_partition.topic, + topic_partition.partition, ) topic_partition.offset = OFFSET_STORED self._consumer.assign(topic_partitions) diff --git a/logprep/connector/confluent_kafka/output.py b/logprep/connector/confluent_kafka/output.py index daf03b71a..f0495c842 100644 --- a/logprep/connector/confluent_kafka/output.py +++ b/logprep/connector/confluent_kafka/output.py @@ -25,6 +25,7 @@ """ import json +import logging from datetime import datetime from functools import cached_property, partial from socket import getfqdn @@ -48,6 +49,8 @@ DEFAULT_RETURN = 0 +logger = logging.getLogger("KafkaOutput") + class ConfluentKafkaOutput(Output): """A kafka connector that serves as output connector.""" @@ -175,7 +178,7 @@ class Config(Output.Config): @cached_property def _producer(self): injected_config = { - "logger": self._logger, + "logger": logger, "stats_cb": self._stats_callback, "error_cb": self._error_callback, } @@ -194,7 +197,7 @@ def _error_callback(self, error: KafkaException): the error that occurred """ self.metrics.number_of_errors += 1 - self._logger.error(f"{self.describe()}: {error}") + logger.error(f"{self.describe()}: {error}") # pylint: disable=logging-fstring-interpolation def _stats_callback(self, stats: str) -> None: """Callback for statistics data. This callback is triggered by poll() diff --git a/logprep/connector/dummy/output.py b/logprep/connector/dummy/output.py index 171a892fe..f6abf3d73 100644 --- a/logprep/connector/dummy/output.py +++ b/logprep/connector/dummy/output.py @@ -62,13 +62,8 @@ class Config(Output.Config): "_exceptions", ] - def __init__( - self, - name: str, - configuration: "Connector.Config", - logger: Logger, - ): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "Connector.Config"): + super().__init__(name, configuration) self.events = [] self.failed_events = [] self.setup_called_count = 0 diff --git a/logprep/connector/elasticsearch/output.py b/logprep/connector/elasticsearch/output.py index 1bcaf652d..c18ba530e 100644 --- a/logprep/connector/elasticsearch/output.py +++ b/logprep/connector/elasticsearch/output.py @@ -35,7 +35,6 @@ import re import ssl from functools import cached_property -from logging import Logger from typing import List, Optional, Pattern, Tuple, Union import elasticsearch as search @@ -50,6 +49,8 @@ from logprep.util.helper import get_dict_size_in_byte from logprep.util.time import TimeParser +logger = logging.getLogger("ElasticsearchOutput") + class ElasticsearchOutput(Output): """An Elasticsearch output connector.""" @@ -111,8 +112,8 @@ class Config(Output.Config): _size_error_pattern: Pattern[str] - def __init__(self, name: str, configuration: "ElasticsearchOutput.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "ElasticsearchOutput.Config"): + super().__init__(name, configuration) self._message_backlog = [] self._size_error_pattern = re.compile( r".*coordinating_operation_bytes=(?P\d+), " @@ -477,7 +478,7 @@ def _build_messages_for_large_error_documents( f"Discarded message that is larger than the allowed size limit " f"({size / 10 ** 6} MB/{self._config.maximum_message_size_mb} MB)" ) - self._logger.warning(error_message) + logger.warning(error_message) error_document = { "processed_snipped": f'{self._encoder.encode(message).decode("utf-8")[:1000]} ...', diff --git a/logprep/connector/file/input.py b/logprep/connector/file/input.py index a1ad41232..88f38094b 100644 --- a/logprep/connector/file/input.py +++ b/logprep/connector/file/input.py @@ -229,8 +229,8 @@ class Config(Input.Config): interval: int = field(default=1, validator=validators.instance_of((int, float))) """Defines the refresh interval, how often the file is checked for changes""" - def __init__(self, name: str, configuration: "FileInput.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "FileInput.Config"): + super().__init__(name, configuration) self.stop_flag = threading.Event() def _calc_file_fingerprint(self, file_pointer: TextIO, fingerprint_length: int = None) -> tuple: diff --git a/logprep/connector/http/input.py b/logprep/connector/http/input.py index 9e9a45f2b..969f59e60 100644 --- a/logprep/connector/http/input.py +++ b/logprep/connector/http/input.py @@ -77,13 +77,13 @@ * Responds with 405 """ +import logging import multiprocessing as mp import queue import re import zlib from abc import ABC from base64 import b64encode -from logging import Logger from typing import Callable, Mapping, Tuple, Union import falcon.asgi @@ -101,6 +101,8 @@ from logprep.util import http from logprep.util.credentials import CredentialsFactory +logger = logging.getLogger("HTTPInput") + def basic_auth(func: Callable): """Decorator to check basic authentication. @@ -411,8 +413,8 @@ class Config(Input.Config): "jsonl": JSONLHttpEndpoint, } - def __init__(self, name: str, configuration: "HttpConnector.Config", logger: Logger) -> None: - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "HttpConnector.Config") -> None: + super().__init__(name, configuration) port = self._config.uvicorn_config["port"] host = self._config.uvicorn_config["host"] ssl_options = any( @@ -433,10 +435,11 @@ def setup(self): raise FatalInputError( self, "Necessary instance attribute `pipeline_index` could not be found." ) - self._logger.debug( - f"HttpInput Connector started on target {self.target} and " - f"queue {id(self.messages)} " - f"with queue_size: {self.messages._maxsize}" # pylint: disable=protected-access + logger.debug( + "HttpInput Connector started on target %s and queue %s with queue_size: %s", + self.target, + id(self.messages), + self.messages._maxsize, # pylint: disable=protected-access ) # Start HTTP Input only when in first process if self.pipeline_index != 1: diff --git a/logprep/connector/jsonl/output.py b/logprep/connector/jsonl/output.py index d574e6561..03b400f3a 100644 --- a/logprep/connector/jsonl/output.py +++ b/logprep/connector/jsonl/output.py @@ -57,8 +57,8 @@ class Config(Output.Config): "failed_events", ] - def __init__(self, name: str, configuration: "Output.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "Output.Config"): + super().__init__(name, configuration) self.events = [] self.failed_events = [] diff --git a/logprep/connector/opensearch/output.py b/logprep/connector/opensearch/output.py index 76f26c67a..3d1568401 100644 --- a/logprep/connector/opensearch/output.py +++ b/logprep/connector/opensearch/output.py @@ -41,7 +41,7 @@ from logprep.abc.output import Output from logprep.connector.elasticsearch.output import ElasticsearchOutput -logging.getLogger("opensearch").setLevel(logging.WARNING) +logger = logging.getLogger("OpenSearchOutput") class MSGPECSerializer(JSONSerializer): diff --git a/logprep/connector/s3/output.py b/logprep/connector/s3/output.py index f42051567..82a24514a 100644 --- a/logprep/connector/s3/output.py +++ b/logprep/connector/s3/output.py @@ -38,10 +38,10 @@ """ +import logging import re from collections import defaultdict from functools import cached_property -from logging import Logger from time import time from typing import Any, DefaultDict, Optional from uuid import uuid4 @@ -82,6 +82,9 @@ def _inner(self: "S3Output", *args) -> Any: return _inner +logger = logging.getLogger("S3Output") + + class S3Output(Output): """An s3 output connector.""" @@ -160,8 +163,8 @@ class Metrics(Output.Metrics): _base_prefix: str - def __init__(self, name: str, configuration: "S3Output.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "S3Output.Config"): + super().__init__(name, configuration) self._message_backlog = defaultdict(list) self._base_prefix = f"{self._config.base_prefix}/" if self._config.base_prefix else "" @@ -306,7 +309,7 @@ def _write_backlog(self) -> None: if not self._message_backlog: return - self._logger.info("Writing %s documents to s3", self._backlog_size) + logger.info("Writing %s documents to s3", self._backlog_size) for prefix_mb, document_batch in self._message_backlog.items(): self._write_document_batch(document_batch, f"{prefix_mb}/{time()}-{uuid4()}") self._message_backlog.clear() @@ -319,7 +322,7 @@ def _write_backlog(self) -> None: @_handle_s3_error def _write_document_batch(self, document_batch: dict, identifier: str) -> None: - self._logger.debug(f'Writing "{identifier}" to s3 bucket "{self._config.bucket}"') + logger.debug('Writing "%s" to s3 bucket "%s"', identifier, self._config.bucket) s3_obj = self._s3_resource.Object(self._config.bucket, identifier) s3_obj.put(Body=self._encoder.encode(document_batch), ContentType="application/json") self.metrics.number_of_successful_writes += len(document_batch) diff --git a/logprep/factory.py b/logprep/factory.py index 84d388b99..8140ae946 100644 --- a/logprep/factory.py +++ b/logprep/factory.py @@ -11,18 +11,14 @@ ) if TYPE_CHECKING: # pragma: no cover - from logging import Logger - from logprep.abc.component import Component class Factory: """Create components for logprep.""" - _logger: "Logger" = logging.getLogger("Factory") - @classmethod - def create(cls, configuration: dict, logger: "Logger") -> "Component": + def create(cls, configuration: dict) -> "Component": """Create component.""" if configuration == {} or configuration is None: raise InvalidConfigurationError("The component definition is empty.") @@ -48,4 +44,4 @@ def create(cls, configuration: dict, logger: "Logger") -> "Component": component_name, component_configuration_dict ) component_configuration.metric_labels = copy.deepcopy(metric_labels) - return component(component_name, component_configuration, logger) + return component(component_name, component_configuration) diff --git a/logprep/framework/pipeline.py b/logprep/framework/pipeline.py index 2c001c584..a723acd2e 100644 --- a/logprep/framework/pipeline.py +++ b/logprep/framework/pipeline.py @@ -142,7 +142,7 @@ def _output(self) -> dict[str, Output]: outputs = {} for output_name in output_names: output_config = output_configs.get(output_name) - outputs |= {output_name: Factory.create({output_name: output_config}, self.logger)} + outputs |= {output_name: Factory.create({output_name: output_config})} return outputs @cached_property @@ -154,7 +154,7 @@ def _input(self) -> Input: input_connector_config[connector_name].update( {"version_information": self._event_version_information} ) - return Factory.create(input_connector_config, self.logger) + return Factory.create(input_connector_config) def __init__( self, config: Configuration, pipeline_index: int = None, lock: Lock = None @@ -194,7 +194,7 @@ def _setup(self): self.logger.info("Finished building pipeline") def _create_processor(self, entry: dict) -> "Processor": - processor = Factory.create(entry, self.logger) + processor = Factory.create(entry) processor.setup() self.logger.debug(f"Created '{processor}' processor") return processor diff --git a/logprep/framework/pipeline_manager.py b/logprep/framework/pipeline_manager.py index c672c4630..30120b14d 100644 --- a/logprep/framework/pipeline_manager.py +++ b/logprep/framework/pipeline_manager.py @@ -16,6 +16,8 @@ from logprep.metrics.metrics import CounterMetric from logprep.util.configuration import Configuration +logger = logging.getLogger("Manager") + class PipelineManager: """Manage pipelines via multi-processing.""" @@ -50,7 +52,6 @@ class Metrics(Component.Metrics): def __init__(self, configuration: Configuration): self.metrics = self.Metrics(labels={"component": "manager"}) - self._logger = logging.getLogger("Manager") if multiprocessing.current_process().name == "MainProcess": self._set_http_input_queue(configuration) self._pipelines: list[multiprocessing.Process] = [] @@ -121,9 +122,10 @@ def restart_failed_pipeline(self): self.prometheus_exporter.mark_process_dead(failed_pipeline.pid) self._pipelines.insert(index, self._create_pipeline(pipeline_index)) exit_code = failed_pipeline.exitcode - self._logger.warning( - f"Restarting failed pipeline on index {pipeline_index} " - f"with exit code: {exit_code}" + logger.warning( + "Restarting failed pipeline on index %s " "with exit code: %s", + pipeline_index, + exit_code, ) def stop(self): @@ -143,7 +145,7 @@ def restart(self): def _create_pipeline(self, index) -> multiprocessing.Process: pipeline = Pipeline(pipeline_index=index, config=self._configuration, lock=self._lock) - self._logger.info("Created new pipeline") + logger.info("Created new pipeline") process = multiprocessing.Process(target=pipeline.run, daemon=True) process.stop = pipeline.stop process.start() diff --git a/logprep/processor/amides/processor.py b/logprep/processor/amides/processor.py index 63ed54c3d..f0ea10cde 100644 --- a/logprep/processor/amides/processor.py +++ b/logprep/processor/amides/processor.py @@ -84,6 +84,7 @@ .. automodule:: logprep.processor.amides.rule """ +import logging from functools import cached_property, lru_cache from multiprocessing import current_process from pathlib import Path @@ -101,6 +102,8 @@ from logprep.util.getter import GetterFactory from logprep.util.helper import get_dotted_field_value +logger = logging.getLogger("Amides") + class Amides(Processor): """Proof-of-concept implementation of the Adaptive Misuse Detection System (AMIDES).""" @@ -212,13 +215,13 @@ def setup(self): def _load_and_unpack_models(self): if not Path(self._config.models_path).exists(): - self._logger.debug("Getting AMIDES models archive...") + logger.debug("Getting AMIDES models archive...") models_archive = Path(f"{current_process().name}-{self.name}.zip") models_archive.touch() models_archive.write_bytes( GetterFactory.from_string(str(self._config.models_path)).get_raw() ) - self._logger.debug("Finished getting AMIDES models archive...") + logger.debug("Finished getting AMIDES models archive...") self._config.models_path = str(models_archive.absolute()) with ZipFile(self._config.models_path, mode="r") as zip_file: diff --git a/logprep/processor/clusterer/processor.py b/logprep/processor/clusterer/processor.py index 53841e8a8..62c7f6fe3 100644 --- a/logprep/processor/clusterer/processor.py +++ b/logprep/processor/clusterer/processor.py @@ -44,8 +44,8 @@ from typing import List from attr import define, field, validators -from logprep.abc.processor import Processor +from logprep.abc.processor import Processor from logprep.processor.base.rule import Rule from logprep.processor.clusterer.rule import ClustererRule from logprep.processor.clusterer.signature_calculation.signature_phase import ( @@ -73,8 +73,8 @@ class Config(Processor.Config): rule_class = ClustererRule - def __init__(self, name: str, configuration: Processor.Config, logger: Logger): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: Processor.Config): + super().__init__(name, configuration) self.matching_rules = [] self.sps = SignaturePhaseStreaming() self.has_custom_tests = True diff --git a/logprep/processor/domain_label_extractor/processor.py b/logprep/processor/domain_label_extractor/processor.py index 6c52ff96a..ea8e6f12f 100644 --- a/logprep/processor/domain_label_extractor/processor.py +++ b/logprep/processor/domain_label_extractor/processor.py @@ -35,6 +35,7 @@ """ import ipaddress +import logging import os import tempfile from functools import cached_property @@ -52,6 +53,8 @@ from logprep.util.helper import add_field_to, get_dotted_field_value from logprep.util.validators import list_of_urls_validator +logger = logging.getLogger("DomainLabelExtractor") + class DomainLabelExtractor(Processor): """Splits a domain into it's parts/labels.""" @@ -88,7 +91,7 @@ def setup(self): super().setup() if self._config.tld_lists: downloaded_tld_lists_paths = [] - self._logger.debug("start tldlists download...") + logger.debug("start tldlists download...") for index, tld_list in enumerate(self._config.tld_lists): logprep_tmp_dir = Path(tempfile.gettempdir()) / "logprep" os.makedirs(logprep_tmp_dir, exist_ok=True) @@ -99,7 +102,7 @@ def setup(self): list_path.write_bytes(GetterFactory.from_string(tld_list).get_raw()) downloaded_tld_lists_paths.append(f"file://{str(list_path.absolute())}") self._config.tld_lists = downloaded_tld_lists_paths - self._logger.debug("finished tldlists download...") + logger.debug("finished tldlists download...") def _apply_rules(self, event, rule: DomainLabelExtractorRule): """ diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index d85a941b6..24d73e210 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -33,6 +33,7 @@ """ import datetime +import logging import os import socket import tempfile @@ -56,6 +57,8 @@ from logprep.util.helper import get_dotted_field_value from logprep.util.validators import list_of_urls_validator +logger = logging.getLogger("DomainResolver") + class DomainResolver(Processor): """Resolve domains.""" @@ -137,13 +140,8 @@ class Metrics(Processor.Metrics): rule_class = DomainResolverRule - def __init__( - self, - name: str, - configuration: Processor.Config, - logger: Logger, - ): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: Processor.Config): + super().__init__(name, configuration) self._domain_ip_map = {} @cached_property @@ -169,7 +167,7 @@ def setup(self): super().setup() if self._config.tld_lists: downloaded_tld_lists_paths = [] - self._logger.debug("start tldlists download...") + logger.debug("start tldlists download...") for index, tld_list in enumerate(self._config.tld_lists): logprep_tmp_dir = Path(tempfile.gettempdir()) / "logprep" os.makedirs(logprep_tmp_dir, exist_ok=True) @@ -180,7 +178,7 @@ def setup(self): list_path.write_bytes(GetterFactory.from_string(tld_list).get_raw()) downloaded_tld_lists_paths.append(f"file://{str(list_path.absolute())}") self._config.tld_lists = downloaded_tld_lists_paths - self._logger.debug("finished tldlists download...") + logger.debug("finished tldlists download...") def _apply_rules(self, event, rule): source_field = rule.source_fields[0] diff --git a/logprep/processor/generic_adder/mysql_connector.py b/logprep/processor/generic_adder/mysql_connector.py index 700154332..c212ea988 100644 --- a/logprep/processor/generic_adder/mysql_connector.py +++ b/logprep/processor/generic_adder/mysql_connector.py @@ -1,12 +1,14 @@ """This module is used to connect to a MySQL database and to retrieve data from a SQL table.""" +import logging import time -from logging import Logger from typing import Optional import mysql import mysql.connector as db +logger = logging.getLogger("MySQLConnector") + class MySQLConnector: """Used to connect to a MySQL database and to retrieve data from a table if it has changed.""" @@ -31,11 +33,9 @@ class MySQLConnector: _last_table_checksum: Optional[int] """Checksum of the database table that was obtained on the last update check""" - _logger: Logger - _cursor: mysql.connector.connection.CursorBase - def __init__(self, sql_config: dict, logger: Logger): + def __init__(self, sql_config: dict): """Initialize the MySQLConnector. Parameters @@ -51,7 +51,6 @@ def __init__(self, sql_config: dict, logger: Logger): True if the SQL table has changed, False otherwise. """ - self._logger = logger self.connection = None self.cursor = None @@ -183,5 +182,5 @@ def get_data(self) -> dict: return table except db.Error as error: - self._logger.warning(f"Error retrieving entry from database: {error}") + logger.warning(f"Error retrieving entry from database: {error}") return {} diff --git a/logprep/processor/generic_adder/processor.py b/logprep/processor/generic_adder/processor.py index 2e47652bb..1da758c70 100644 --- a/logprep/processor/generic_adder/processor.py +++ b/logprep/processor/generic_adder/processor.py @@ -39,7 +39,6 @@ import os import re import time -from logging import Logger from typing import Optional from attr import define, field, validators @@ -136,7 +135,7 @@ class Config(Processor.Config): _db_file_path: Optional[str] """Path to file containing table from SQL database""" - def __init__(self, name: str, configuration: Processor.Config, logger: Logger): + def __init__(self, name: str, configuration: Processor.Config): """Initialize a generic adder instance. Performs a basic processor initialization. Furthermore, a SQL database and a SQL table are being initialized if a SQL configuration exists. @@ -146,10 +145,8 @@ def __init__(self, name: str, configuration: Processor.Config, logger: Logger): Name for the generic adder. configuration : Processor.Config Configuration for SQL adding and rule loading. - logger : logging.Logger - Logger to use. """ - super().__init__(name, configuration, logger) + super().__init__(name, configuration) self._db_table = None sql_config = configuration.sql_config @@ -157,7 +154,7 @@ def __init__(self, name: str, configuration: Processor.Config, logger: Logger): self._initialize_sql(sql_config) def _initialize_sql(self, sql_config): - self._db_connector = MySQLConnector(sql_config, self._logger) if sql_config else None + self._db_connector = MySQLConnector(sql_config) if sql_config else None if self._db_connector: self._file_lock_path = sql_config.get("file_lock_path", "sql_update.lock") self._db_file_path = sql_config.get("db_file_path", "sql_db_table.json") diff --git a/logprep/processor/generic_resolver/processor.py b/logprep/processor/generic_resolver/processor.py index c1cd32dd0..40c7706fd 100644 --- a/logprep/processor/generic_resolver/processor.py +++ b/logprep/processor/generic_resolver/processor.py @@ -51,13 +51,8 @@ class GenericResolver(Processor): rule_class = GenericResolverRule - def __init__( - self, - name: str, - configuration: Processor.Config, - logger: Logger, - ): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: Processor.Config): + super().__init__(name, configuration) self._replacements_from_file = {} def _apply_rules(self, event, rule): diff --git a/logprep/processor/geoip_enricher/processor.py b/logprep/processor/geoip_enricher/processor.py index b18639fae..5043b57c5 100644 --- a/logprep/processor/geoip_enricher/processor.py +++ b/logprep/processor/geoip_enricher/processor.py @@ -26,6 +26,7 @@ .. automodule:: logprep.processor.geoip_enricher.rule """ +import logging import os import tempfile from functools import cached_property @@ -43,6 +44,8 @@ from logprep.util.getter import GetterFactory from logprep.util.helper import add_field_to, get_dotted_field_value +logger = logging.getLogger("GeoipEnricher") + class GeoipEnricher(Processor): """Resolve values in documents by referencing a mapping list.""" @@ -71,7 +74,7 @@ def setup(self): super().setup() db_path = Path(self._config.db_path) if not db_path.exists(): - self._logger.debug("start geoip database download...") + logger.debug("start geoip database download...") logprep_tmp_dir = Path(tempfile.gettempdir()) / "logprep" os.makedirs(logprep_tmp_dir, exist_ok=True) db_path_file = logprep_tmp_dir / f"{self.name}.mmdb" @@ -81,7 +84,7 @@ def setup(self): db_path_file.write_bytes( GetterFactory.from_string(str(self._config.db_path)).get_raw() ) - self._logger.debug("finished geoip database download.") + logger.debug("finished geoip database download.") self._config.db_path = str(db_path_file.absolute()) def _try_getting_geoip_data(self, ip_string): diff --git a/logprep/processor/grokker/processor.py b/logprep/processor/grokker/processor.py index c910dedae..d65df3fd6 100644 --- a/logprep/processor/grokker/processor.py +++ b/logprep/processor/grokker/processor.py @@ -31,6 +31,7 @@ .. automodule:: logprep.processor.grokker.rule """ +import logging import re from pathlib import Path from zipfile import ZipFile @@ -47,6 +48,8 @@ from logprep.util.getter import GetterFactory from logprep.util.helper import add_field_to, get_dotted_field_value +logger = logging.getLogger("Grokker") + class Grokker(Processor): """A processor that dissects a message by grok patterns""" @@ -119,10 +122,10 @@ def setup(self): def _download_zip_file(self, source_file: str, target_dir: Path): if not target_dir.exists(): - self._logger.debug("start grok pattern download...") + logger.debug("start grok pattern download...") archive = Path(f"{target_dir}.zip") archive.touch() archive.write_bytes(GetterFactory.from_string(source_file).get_raw()) - self._logger.debug("finished grok pattern download.") + logger.debug("finished grok pattern download.") with ZipFile(str(archive), mode="r") as zip_file: zip_file.extractall(target_dir) diff --git a/logprep/processor/hyperscan_resolver/processor.py b/logprep/processor/hyperscan_resolver/processor.py index 04137dda3..aa5b4383d 100644 --- a/logprep/processor/hyperscan_resolver/processor.py +++ b/logprep/processor/hyperscan_resolver/processor.py @@ -32,7 +32,6 @@ """ import errno -from logging import Logger from os import makedirs, path from typing import Any, Dict, Tuple @@ -92,13 +91,8 @@ class Config(Processor.Config): rule_class = HyperscanResolverRule - def __init__( - self, - name: str, - configuration: Processor.Config, - logger: Logger, - ): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: Processor.Config): + super().__init__(name=name, configuration=configuration) self._hyperscan_databases = {} hyperscan_db_path = configuration.hyperscan_db_path diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py index fe27fbca8..865d5cc81 100644 --- a/logprep/processor/labeler/processor.py +++ b/logprep/processor/labeler/processor.py @@ -26,7 +26,6 @@ .. automodule:: logprep.processor.labeler.rule """ -from logging import Logger from typing import Optional from attr import define, field, validators @@ -60,14 +59,9 @@ class Config(Processor.Config): rule_class = LabelerRule - def __init__( - self, - name: str, - configuration: Processor.Config, - logger: Logger, - ): + def __init__(self, name: str, configuration: Processor.Config): self._schema = LabelingSchema.create_from_file(configuration.schema) - super().__init__(name, configuration=configuration, logger=logger) + super().__init__(name, configuration=configuration) def setup(self): super().setup() diff --git a/logprep/processor/list_comparison/processor.py b/logprep/processor/list_comparison/processor.py index 4a23655fc..7833fbde3 100644 --- a/logprep/processor/list_comparison/processor.py +++ b/logprep/processor/list_comparison/processor.py @@ -28,8 +28,6 @@ .. automodule:: logprep.processor.list_comparison.rule """ -from logging import Logger - from attr import define, field, validators from logprep.abc.processor import Processor @@ -61,8 +59,8 @@ class Config(Processor.Config): rule_class = ListComparisonRule - def __init__(self, name: str, configuration: "Processor.Config", logger: Logger): - super().__init__(name, configuration, logger) + def __init__(self, name: str, configuration: "Processor.Config"): + super().__init__(name, configuration) self.setup() def setup(self): diff --git a/logprep/processor/normalizer/processor.py b/logprep/processor/normalizer/processor.py index 7ed5dc7a6..44e0fa6b8 100644 --- a/logprep/processor/normalizer/processor.py +++ b/logprep/processor/normalizer/processor.py @@ -39,7 +39,6 @@ import os import re from functools import reduce -from logging import Logger from pathlib import Path from time import time from typing import List, Optional, Tuple, Union @@ -123,7 +122,7 @@ class Config(Processor.Config): _encoder = msgspec.json.Encoder() _decoder = msgspec.json.Decoder() - def __init__(self, name: str, configuration: Processor.Config, logger: Logger): + def __init__(self, name: str, configuration: Processor.Config): self._event = None self._conflicting_fields = [] @@ -147,7 +146,7 @@ def __init__(self, name: str, configuration: Processor.Config, logger: Logger): if self._html_replace_fields: getter = GetterFactory.from_string(self._html_replace_fields) self._html_replace_fields = getter.get_yaml() - super().__init__(name=name, configuration=configuration, logger=logger) + super().__init__(name=name, configuration=configuration) # pylint: enable=arguments-differ diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index 2ca907fad..7398fed89 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -45,7 +45,6 @@ import re from functools import cached_property, lru_cache from itertools import chain -from logging import Logger from typing import Optional, Pattern from urllib.parse import parse_qs, urlencode, urlparse @@ -217,8 +216,8 @@ def _get_pseudonym_dict_cached(self): def _pseudonymize_url_cached(self): return lru_cache(maxsize=self._config.max_cached_pseudonymized_urls)(self._pseudonymize_url) - def __init__(self, name: str, configuration: Processor.Config, logger: Logger): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: Processor.Config): + super().__init__(name=name, configuration=configuration) self.pseudonyms = [] def setup(self): diff --git a/logprep/processor/selective_extractor/processor.py b/logprep/processor/selective_extractor/processor.py index 05d503f17..3441a25da 100644 --- a/logprep/processor/selective_extractor/processor.py +++ b/logprep/processor/selective_extractor/processor.py @@ -29,7 +29,6 @@ .. automodule:: logprep.processor.selective_extractor.rule """ -from logging import Logger from typing import List, Tuple from logprep.processor.field_manager.processor import FieldManager @@ -47,13 +46,8 @@ class SelectiveExtractor(FieldManager): rule_class = SelectiveExtractorRule - def __init__( - self, - name: str, - configuration: FieldManager.Config, - logger: Logger, - ): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: FieldManager.Config): + super().__init__(name=name, configuration=configuration) self._extra_data = [] def process(self, event: dict) -> List[Tuple[List, str, str]]: diff --git a/logprep/processor/template_replacer/processor.py b/logprep/processor/template_replacer/processor.py index 64f073619..0909207d4 100644 --- a/logprep/processor/template_replacer/processor.py +++ b/logprep/processor/template_replacer/processor.py @@ -89,8 +89,8 @@ class Config(Processor.Config): rule_class = TemplateReplacerRule - def __init__(self, name: str, configuration: Processor.Config, logger: Logger): - super().__init__(name=name, configuration=configuration, logger=logger) + def __init__(self, name: str, configuration: Processor.Config): + super().__init__(name=name, configuration=configuration) pattern = configuration.pattern template_path = configuration.template self._target_field = pattern["target_field"] diff --git a/logprep/util/auto_rule_tester/auto_rule_corpus_tester.py b/logprep/util/auto_rule_tester/auto_rule_corpus_tester.py index b7d8be6a2..dbdbaf09e 100644 --- a/logprep/util/auto_rule_tester/auto_rule_corpus_tester.py +++ b/logprep/util/auto_rule_tester/auto_rule_corpus_tester.py @@ -91,7 +91,6 @@ import tempfile from functools import cached_property from json import JSONDecodeError -from logging.config import dictConfig from pathlib import Path from pprint import pprint from typing import List @@ -105,6 +104,8 @@ from logprep.util.helper import get_dotted_field_value from logprep.util.json_handling import parse_json +logger = logging.getLogger("corpustester") + def align_extra_output_formats(extra_outputs): """ @@ -147,14 +148,6 @@ class TestCase: generated_extra_output: dict = field(validator=validators.instance_of(list), default=[]) failed: bool = field(validator=validators.instance_of(bool), default=False) report: List = Factory(list) - warnings: str = field(default="") - - def __init__(self, config_paths: tuple[str], input_test_data_path: str): - self._original_config_paths = config_paths - self._input_test_data_path = input_test_data_path - self.log_capture_string = io.StringIO() - self.logger = logging.getLogger("corpustester") - self.logger.handlers[0].setStream(self.log_capture_string) @cached_property def _tmp_dir(self): @@ -193,9 +186,13 @@ def _pipeline(self): } patched_config.pipeline = config.pipeline pipeline = Pipeline(config=patched_config) - pipeline.logger = self.logger return pipeline + def __init__(self, config_paths: tuple[str], input_test_data_path: str): + self._original_config_paths = config_paths + self._input_test_data_path = input_test_data_path + self.log_capture_string = sys.stdout + def run(self): """ Starts the test routine by reading all input files, patching the logprep pipline, executing @@ -220,7 +217,6 @@ def _run_pipeline_per_test_case(self): for test_case_id, test_case in self._test_cases.items(): _ = [processor.setup() for processor in self._pipeline._pipeline] parsed_event, extra_outputs = self._pipeline.process_pipeline() - test_case.warnings = self._retrieve_log_capture() extra_outputs = align_extra_output_formats(extra_outputs) test_case.generated_output = parsed_event test_case.generated_extra_output = extra_outputs @@ -228,13 +224,6 @@ def _run_pipeline_per_test_case(self): self._compare_extra_data_output(test_case_id, extra_outputs) self._print_pass_fail_statements(test_case_id) - def _retrieve_log_capture(self): - log_capture = self.log_capture_string.getvalue() - # set new log_capture to clear previous entries - self.log_capture_string = io.StringIO() - self.logger.handlers[0].setStream(self.log_capture_string) - return log_capture - def _compare_logprep_outputs(self, test_case_id, logprep_output): test_case = self._test_cases.get(test_case_id, {}) if test_case.expected_output: @@ -341,9 +330,6 @@ def _print_pass_fail_statements(self, test_case_id): status = f"{Style.BRIGHT}{Fore.RESET} SKIPPED - (no expected output given)" elif len(test_case.report) > 0: status = f"{Style.BRIGHT}{Fore.RED} FAILED" - elif test_case.warnings: - status = f"{Style.BRIGHT}{Fore.YELLOW} PASSED - (with warnings)" - print(f"{Fore.BLUE} Test Case: {Fore.CYAN}{test_case_id} {status}{Style.RESET_ALL}") def _print_test_reports(self): @@ -351,7 +337,7 @@ def _print_test_reports(self): return print(Style.BRIGHT + "# Test Cases Detailed Reports:" + Style.RESET_ALL) for test_case_id, test_case in self._test_cases.items(): - if (test_case.warnings or test_case.report) and test_case.expected_output: + if test_case.report and test_case.expected_output: self._print_long_test_result(test_case_id, test_case) print() @@ -359,13 +345,6 @@ def _print_long_test_result(self, test_case_id, test_case): report_title = f"test report for '{test_case_id}'" print(f"{Fore.RED}{Style.BRIGHT}↓ {report_title} ↓ {Style.RESET_ALL}") print_logprep_output = True - if test_case.warnings and not test_case.report: - print(Fore.GREEN + "Test passed, but with following warnings:" + Fore.RESET) - print(test_case.warnings) - print_logprep_output = False - if test_case.warnings and test_case.report: - print(Fore.RED + "Logprep Warnings:" + Fore.RESET) - print(test_case.warnings) for statement in test_case.report: if isinstance(statement, (dict, list)): pprint(statement) diff --git a/logprep/util/auto_rule_tester/auto_rule_tester.py b/logprep/util/auto_rule_tester/auto_rule_tester.py index a5704b062..c20def687 100644 --- a/logprep/util/auto_rule_tester/auto_rule_tester.py +++ b/logprep/util/auto_rule_tester/auto_rule_tester.py @@ -466,7 +466,7 @@ def _check_which_rule_files_miss_tests(self, rules_pn): @staticmethod def _get_processor_instance(name, processor_cfg, logger_): cfg = {name: processor_cfg} - processor = Factory.create(cfg, logger_) + processor = Factory.create(cfg) return processor @staticmethod diff --git a/logprep/util/configuration.py b/logprep/util/configuration.py index de407438c..403eb17e4 100644 --- a/logprep/util/configuration.py +++ b/logprep/util/configuration.py @@ -728,7 +728,7 @@ def _build_merged_pipeline(self): def _load_rule_definitions(self, processor_definition: dict) -> dict: processor_definition = deepcopy(processor_definition) - _ = Factory.create(processor_definition, logger=getLogger(__name__)) + _ = Factory.create(processor_definition) processor_name, processor_config = processor_definition.popitem() for rule_tree_name in ("specific_rules", "generic_rules"): rules_targets = self._resolve_directories(processor_config.get(rule_tree_name, [])) @@ -803,7 +803,7 @@ def _verify(self): try: if not self.input: raise RequiredConfigurationKeyMissingError("input") - Factory.create(self.input, logger=getLogger(__name__)) + Factory.create(self.input) except Exception as error: # pylint: disable=broad-except errors.append(error) if not self.output: @@ -811,12 +811,12 @@ def _verify(self): else: for output_name, output_config in self.output.items(): try: - Factory.create({output_name: output_config}, logger=getLogger(__name__)) + Factory.create({output_name: output_config}) except Exception as error: # pylint: disable=broad-except errors.append(error) for processor_config in self.pipeline: try: - processor = Factory.create(deepcopy(processor_config), logger=getLogger(__name__)) + processor = Factory.create(deepcopy(processor_config)) self._verify_rules(processor) except (FactoryError, TypeError, ValueError, InvalidRuleDefinitionError) as error: errors.append(error) diff --git a/logprep/util/defaults.py b/logprep/util/defaults.py index ab260bf26..9b296eb1e 100644 --- a/logprep/util/defaults.py +++ b/logprep/util/defaults.py @@ -20,8 +20,7 @@ "class": "logging.StreamHandler", "formatter": "logprep", "stream": "ext://sys.stdout", - }, - "string": {"class": "logging.StreamHandler", "level": "WARNING"}, + } }, "loggers": { "root": {"level": "INFO", "handlers": ["console"]}, @@ -29,11 +28,6 @@ "urllib3.connectionpool": {"level": "ERROR"}, "elasticsearch": {"level": "ERROR"}, "opensearch": {"level": "ERROR"}, - "corpustester": { - "level": "WARNING", - "handlers": ["string"], - "propagate": "1", - }, }, "filters": {}, "disable_existing_loggers": False, diff --git a/logprep/util/pre_detector_rule_matching_tester.py b/logprep/util/pre_detector_rule_matching_tester.py index be89863c0..8d41f28ef 100644 --- a/logprep/util/pre_detector_rule_matching_tester.py +++ b/logprep/util/pre_detector_rule_matching_tester.py @@ -134,7 +134,7 @@ def _get_pre_detector() -> PreDetector: } logger = logging.getLogger() logger.disabled = True - processor = Factory.create(processor_cfg, logger) + processor = Factory.create(processor_cfg) return processor def _print_results(self): diff --git a/tests/unit/component/base.py b/tests/unit/component/base.py index b3b158258..1dfeca62a 100644 --- a/tests/unit/component/base.py +++ b/tests/unit/component/base.py @@ -31,7 +31,7 @@ class BaseComponentTestCase(ABC): def setup_method(self) -> None: config = {"Test Instance Name": self.CONFIG} - self.object = Factory.create(configuration=config, logger=self.logger) + self.object = Factory.create(configuration=config) assert "metrics" not in self.object.__dict__, "metrics should be a cached_property" self.metric_attributes = asdict( self.object.metrics, diff --git a/tests/unit/connector/base.py b/tests/unit/connector/base.py index 9e5c59d6e..010c1e214 100644 --- a/tests/unit/connector/base.py +++ b/tests/unit/connector/base.py @@ -58,7 +58,7 @@ def test_add_hmac_returns_true_if_hmac_options(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) assert connector._add_hmac is True def test_add_hmac_to_adds_hmac(self): @@ -74,7 +74,7 @@ def test_add_hmac_to_adds_hmac(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) processed_event, non_critical_error_msg = connector._add_hmac_to( {"message": "test message"}, b"test message" ) @@ -101,7 +101,7 @@ def test_add_hmac_to_adds_hmac_even_if_no_raw_message_was_given(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) processed_event, non_critical_error_msg = connector._add_hmac_to( {"message": "test message"}, None ) @@ -129,7 +129,7 @@ def test_get_next_with_hmac_of_raw_message(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"message": "with_content"} raw_encoded_test_event = json.dumps(test_event, separators=(",", ":")).encode("utf-8") connector._get_event = mock.MagicMock( @@ -164,7 +164,7 @@ def test_get_next_with_hmac_of_subfield(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"message": {"with_subfield": "content"}} raw_encoded_test_event = json.dumps(test_event, separators=(",", ":")).encode("utf-8") connector._get_event = mock.MagicMock( @@ -200,7 +200,7 @@ def test_get_next_with_hmac_of_non_existing_subfield(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"message": {"with_subfield": "content"}} raw_encoded_test_event = json.dumps(test_event, separators=(",", ":")).encode("utf-8") connector._get_event = mock.MagicMock( @@ -234,7 +234,7 @@ def test_get_next_with_hmac_result_in_dotted_subfield(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"message": "with_content"} raw_encoded_test_event = json.dumps(test_event, separators=(",", ":")).encode("utf-8") connector._get_event = mock.MagicMock( @@ -275,7 +275,7 @@ def test_get_next_with_hmac_result_in_already_existing_subfield(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"message": {"with_subfield": "content"}} raw_encoded_test_event = json.dumps(test_event, separators=(",", ":")).encode("utf-8") connector._get_event = mock.MagicMock( @@ -291,7 +291,7 @@ def test_get_next_without_hmac(self): connector_config = deepcopy(self.CONFIG) assert not connector_config.get("preprocessing", {}).get("hmac") test_event = {"message": "with_content"} - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) raw_encoded_test_event = json.dumps(test_event, separators=(",", ":")).encode("utf-8") connector._get_event = mock.MagicMock( return_value=(test_event.copy(), raw_encoded_test_event) @@ -309,7 +309,7 @@ def test_preprocessing_version_info_is_added_if_configured(self): } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -325,7 +325,7 @@ def test_pipeline_preprocessing_does_not_add_versions_if_target_field_exists_alr } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "version_info": "something random"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -339,7 +339,7 @@ def test_pipeline_preprocessing_only_version_information(self): } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "version_info": "something random"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -370,7 +370,7 @@ def test_get_next_adds_timestamp_if_configured(self): } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) connector._get_event = mock.MagicMock(return_value=({"any": "content"}, None)) result, _ = connector.get_next(0.01) target_field = preprocessing_config.get("preprocessing", {}).get( @@ -390,7 +390,7 @@ def test_pipeline_preprocessing_does_not_add_log_arrival_time_if_target_field_ex } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "arrival_time": "does not matter"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -408,7 +408,7 @@ def test_pipeline_preprocessing_adds_timestamp_delta_if_configured(self): } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content", "@timestamp": "1999-09-09T09:09:09.448319+02:00"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -434,7 +434,7 @@ def test_pipeline_preprocessing_does_not_add_timestamp_delta_if_configured_but_r } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -449,7 +449,7 @@ def test_pipeline_preprocessing_does_not_add_timestamp_delta_if_not_configured(s } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -468,7 +468,7 @@ def test_pipeline_preprocessing_does_not_add_timestamp_delta_if_configured_but_l } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content"} connector._get_event = mock.MagicMock(return_value=(test_event, None)) result, _ = connector.get_next(0.01) @@ -484,7 +484,7 @@ def test_preprocessing_enriches_by_env_variable(self): } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content"} os.environ["TEST_ENV_VARIABLE"] = "test_value" connector._get_event = mock.MagicMock(return_value=(test_event, None)) @@ -502,7 +502,7 @@ def test_preprocessing_enriches_by_multiple_env_variables(self): } connector_config = deepcopy(self.CONFIG) connector_config.update(preprocessing_config) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) test_event = {"any": "content"} os.environ["TEST_ENV_VARIABLE_FOO"] = "test_value_foo" os.environ["TEST_ENV_VARIABLE_BAR"] = "test_value_bar" diff --git a/tests/unit/connector/test_confluent_kafka_common.py b/tests/unit/connector/test_confluent_kafka_common.py index 05e49fe8d..ee8b729e5 100644 --- a/tests/unit/connector/test_confluent_kafka_common.py +++ b/tests/unit/connector/test_confluent_kafka_common.py @@ -26,7 +26,7 @@ def test_create_fails_for_unknown_option(self): kafka_config = deepcopy(self.CONFIG) kafka_config.update({"unknown_option": "bad value"}) with pytest.raises(TypeError, match=r"unexpected keyword argument"): - _ = Factory.create({"test connector": kafka_config}, logger=self.logger) + _ = Factory.create({"test connector": kafka_config}) def test_error_callback_logs_error(self): self.object.metrics.number_of_errors = 0 diff --git a/tests/unit/connector/test_confluent_kafka_input.py b/tests/unit/connector/test_confluent_kafka_input.py index b7ba6dda0..a1938ff89 100644 --- a/tests/unit/connector/test_confluent_kafka_input.py +++ b/tests/unit/connector/test_confluent_kafka_input.py @@ -3,6 +3,7 @@ # pylint: disable=wrong-import-position # pylint: disable=wrong-import-order # pylint: disable=attribute-defined-outside-init +import logging import socket from copy import deepcopy from unittest import mock @@ -16,6 +17,7 @@ FatalInputError, InputWarning, ) +from logprep.connector.confluent_kafka.input import logger from logprep.factory import Factory from logprep.factory_error import InvalidConfigurationError from tests.unit.connector.base import BaseInputTestCase @@ -114,7 +116,7 @@ def test_shut_down_calls_consumer_close(self, _): @mock.patch("logprep.connector.confluent_kafka.input.Consumer") def test_batch_finished_callback_calls_offsets_handler_for_setting(self, _, settings, handlers): input_config = deepcopy(self.CONFIG) - kafka_input = Factory.create({"test": input_config}, logger=self.logger) + kafka_input = Factory.create({"test": input_config}) kafka_input._config.kafka_config.update(settings) kafka_consumer = kafka_input._consumer message = "test message" @@ -140,7 +142,7 @@ def test_batch_finished_callback_raises_input_warning_on_kafka_exception( self, _, settings, handler ): input_config = deepcopy(self.CONFIG) - kafka_input = Factory.create({"test": input_config}, logger=self.logger) + kafka_input = Factory.create({"test": input_config}) kafka_input._config.kafka_config.update(settings) kafka_consumer = kafka_input._consumer return_sequence = [KafkaException("test error"), None] @@ -246,7 +248,7 @@ def test_default_config_is_injected(self, mock_consumer): "statistics.interval.ms": "30000", "bootstrap.servers": "testserver:9092", "group.id": "testgroup", - "logger": self.object._logger, + "logger": logger, "on_commit": self.object._commit_callback, "stats_cb": self.object._stats_callback, "error_cb": self.object._error_callback, @@ -257,7 +259,7 @@ def test_default_config_is_injected(self, mock_consumer): @mock.patch("logprep.connector.confluent_kafka.input.Consumer") def test_client_id_can_be_overwritten(self, mock_consumer): input_config = deepcopy(self.CONFIG) - kafka_input = Factory.create({"test": input_config}, logger=self.logger) + kafka_input = Factory.create({"test": input_config}) kafka_input._config.kafka_config["client.id"] = "thisclientid" kafka_input.setup() mock_consumer.assert_called() @@ -266,7 +268,7 @@ def test_client_id_can_be_overwritten(self, mock_consumer): @mock.patch("logprep.connector.confluent_kafka.input.Consumer") def test_statistics_interval_can_be_overwritten(self, mock_consumer): - kafka_input = Factory.create({"test": self.CONFIG}, logger=self.logger) + kafka_input = Factory.create({"test": self.CONFIG}) kafka_input._config.kafka_config["statistics.interval.ms"] = "999999999" kafka_input.setup() mock_consumer.assert_called() @@ -284,7 +286,7 @@ def test_raises_value_error_if_mandatory_parameters_not_set(self): config.get("kafka_config").pop("group.id") expected_error_message = r"keys are missing: {'(bootstrap.servers|group.id)', '(bootstrap.servers|group.id)'}" # pylint: disable=line-too-long with pytest.raises(InvalidConfigurationError, match=expected_error_message): - Factory.create({"test": config}, logger=self.logger) + Factory.create({"test": config}) @pytest.mark.parametrize( "metric_name", diff --git a/tests/unit/connector/test_confluent_kafka_output.py b/tests/unit/connector/test_confluent_kafka_output.py index 5af0f0c3f..bf6a868d4 100644 --- a/tests/unit/connector/test_confluent_kafka_output.py +++ b/tests/unit/connector/test_confluent_kafka_output.py @@ -54,7 +54,7 @@ class TestConfluentKafkaOutput(BaseOutputTestCase, CommonConfluentKafkaTestCase) @mock.patch("logprep.connector.confluent_kafka.output.Producer", return_value="The Producer") def test_producer_property_instanciates_kafka_producer(self, _): - kafka_output = Factory.create({"test connector": self.CONFIG}, logger=self.logger) + kafka_output = Factory.create({"test connector": self.CONFIG}) assert kafka_output._producer == "The Producer" @mock.patch("logprep.connector.confluent_kafka.output.Producer") @@ -158,4 +158,4 @@ def test_raises_value_error_if_mandatory_parameters_not_set(self): config.get("kafka_config").pop("bootstrap.servers") expected_error_message = r"keys are missing: {'bootstrap.servers'}" with pytest.raises(InvalidConfigurationError, match=expected_error_message): - Factory.create({"test": config}, logger=self.logger) + Factory.create({"test": config}) diff --git a/tests/unit/connector/test_dummy_input.py b/tests/unit/connector/test_dummy_input.py index e72a637d2..6196d4a4d 100644 --- a/tests/unit/connector/test_dummy_input.py +++ b/tests/unit/connector/test_dummy_input.py @@ -46,7 +46,7 @@ def test_raises_exceptions_instead_of_returning_them(self): def test_repeat_documents_repeats_documents(self): config = copy.deepcopy(self.CONFIG) config["repeat_documents"] = True - connector = Factory.create(configuration={"Test Instance Name": config}, logger=self.logger) + connector = Factory.create(configuration={"Test Instance Name": config}) connector._config.documents = [{"order": 0}, {"order": 1}, {"order": 2}] for order in range(0, 9): diff --git a/tests/unit/connector/test_dummy_output.py b/tests/unit/connector/test_dummy_output.py index a8e1dd8e2..2e8095f58 100644 --- a/tests/unit/connector/test_dummy_output.py +++ b/tests/unit/connector/test_dummy_output.py @@ -48,7 +48,7 @@ def test_store_maintains_order_of_documents(self): def test_raises_exception_on_call_to_store(self): config = deepcopy(self.CONFIG) config.update({"exceptions": ["FatalOutputError"]}) - dummy_output = Factory.create({"test connector": config}, logger=self.logger) + dummy_output = Factory.create({"test connector": config}) with raises(BaseException, match="FatalOutputError"): dummy_output.store({"order": 0}) @@ -56,7 +56,7 @@ def test_raises_exception_on_call_to_store(self): def test_raises_exception_on_call_to_store_custom(self): config = deepcopy(self.CONFIG) config.update({"exceptions": ["FatalOutputError"]}) - dummy_output = Factory.create({"test connector": config}, logger=self.logger) + dummy_output = Factory.create({"test connector": config}) with raises(Exception, match="FatalOutputError"): dummy_output.store_custom({"order": 0}, target="whatever") @@ -64,7 +64,7 @@ def test_raises_exception_on_call_to_store_custom(self): def test_raises_exception_only_once(self): config = deepcopy(self.CONFIG) config.update({"exceptions": ["FatalOutputError"]}) - dummy_output = Factory.create({"test connector": config}, logger=self.logger) + dummy_output = Factory.create({"test connector": config}) with raises(Exception, match="FatalOutputError"): dummy_output.store({"order": 0}) @@ -76,7 +76,7 @@ def test_raises_exception_only_once(self): def test_raises_exception_only_when_not_none(self): config = deepcopy(self.CONFIG) config.update({"exceptions": [None, "FatalOutputError", None]}) - dummy_output = Factory.create({"test connector": config}, logger=self.logger) + dummy_output = Factory.create({"test connector": config}) dummy_output.store({"order": 0}) with raises(Exception, match="FatalOutputError"): diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 7320df3f3..f2effca91 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -91,7 +91,7 @@ def test_create_connector(self): def test_no_pipeline_index(self): connector_config = deepcopy(self.CONFIG) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) try: connector.setup() assert False @@ -100,7 +100,7 @@ def test_no_pipeline_index(self): def test_not_first_pipeline(self): connector_config = deepcopy(self.CONFIG) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 2 connector.setup() assert connector.http_server is None @@ -258,7 +258,7 @@ def test_get_metadata(self): connector_config = deepcopy(self.CONFIG) connector_config["collect_meta"] = True connector_config["metafield_name"] = "custom" - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() target = connector.target @@ -273,7 +273,7 @@ def test_server_multiple_config_changes(self): message = {"message": "my message"} connector_config = deepcopy(self.CONFIG) connector_config["uvicorn_config"]["port"] = 9001 - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() target = connector.target @@ -285,7 +285,7 @@ def test_server_multiple_config_changes(self): except requests.exceptions.ConnectionError as e: assert e.response is None connector_config = deepcopy(self.CONFIG) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() target = connector.target @@ -305,7 +305,7 @@ def test_get_next_with_hmac_of_raw_message(self): } } ) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() test_event = "the content" @@ -324,7 +324,7 @@ def test_get_next_with_hmac_of_raw_message(self): def test_endpoint_has_basic_auth(self, credentials_file_path): mock_env = {ENV_NAME_LOGPREP_CREDENTIALS_FILE: credentials_file_path} with mock.patch.dict("os.environ", mock_env): - new_connector = Factory.create({"test connector": self.CONFIG}, logger=self.logger) + new_connector = Factory.create({"test connector": self.CONFIG}) new_connector.pipeline_index = 1 new_connector.setup() resp = requests.post(url=f"{self.target}/auth-json-file", timeout=0.5) @@ -345,7 +345,7 @@ def test_endpoint_has_basic_auth(self, credentials_file_path): assert resp.status_code == 200 def test_two_connector_instances_share_the_same_queue(self): - new_connector = Factory.create({"test connector": self.CONFIG}, logger=self.logger) + new_connector = Factory.create({"test connector": self.CONFIG}) assert self.object.messages is new_connector.messages def test_messages_is_multiprocessing_queue(self): @@ -369,12 +369,12 @@ def test_all_endpoints_share_the_same_queue(self): def test_sets_target_to_https_schema_if_ssl_options(self): connector_config = deepcopy(self.CONFIG) connector_config["uvicorn_config"]["ssl_keyfile"] = "path/to/keyfile" - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) assert connector.target.startswith("https://") def test_sets_target_to_http_schema_if_no_ssl_options(self): connector_config = deepcopy(self.CONFIG) - connector = Factory.create({"test connector": connector_config}, logger=self.logger) + connector = Factory.create({"test connector": connector_config}) assert connector.target.startswith("http://") def test_get_event_sets_message_backlog_size_metric(self): diff --git a/tests/unit/connector/test_json_input.py b/tests/unit/connector/test_json_input.py index 08908699a..92ac8907b 100644 --- a/tests/unit/connector/test_json_input.py +++ b/tests/unit/connector/test_json_input.py @@ -62,7 +62,7 @@ def test_repeat_documents_repeats_documents(self, mock_parse): config = copy.deepcopy(self.CONFIG) config["repeat_documents"] = True mock_parse.return_value = [{"order": 0}, {"order": 1}, {"order": 2}] - object = Factory.create(configuration={"Test Instance Name": config}, logger=self.logger) + object = Factory.create(configuration={"Test Instance Name": config}) for order in range(0, 9): event, _ = object.get_next(self.timeout) diff --git a/tests/unit/connector/test_jsonl_input.py b/tests/unit/connector/test_jsonl_input.py index a67b4e30d..fe928c651 100644 --- a/tests/unit/connector/test_jsonl_input.py +++ b/tests/unit/connector/test_jsonl_input.py @@ -56,7 +56,7 @@ def test_repeat_documents_repeats_documents(self, mock_parse): config = copy.deepcopy(self.CONFIG) config["repeat_documents"] = True mock_parse.return_value = [{"order": 0}, {"order": 1}, {"order": 2}] - object = Factory.create(configuration={"Test Instance Name": config}, logger=self.logger) + object = Factory.create(configuration={"Test Instance Name": config}) for order in range(0, 9): event, _ = object.get_next(self.timeout) diff --git a/tests/unit/connector/test_opensearch_output.py b/tests/unit/connector/test_opensearch_output.py index 178b3c976..e97d6e887 100644 --- a/tests/unit/connector/test_opensearch_output.py +++ b/tests/unit/connector/test_opensearch_output.py @@ -382,7 +382,7 @@ def test_opensearch_parallel_bulk(self): "message_backlog_size": 1, "timeout": 5000, } - output: OpensearchOutput = Factory.create({"opensearch_output": config}, mock.MagicMock()) + output: OpensearchOutput = Factory.create({"opensearch_output": config}) uuid_str = str(uuid.uuid4()) result = output._search_context.search( index="defaultindex", body={"query": {"match": {"foo": uuid_str}}} diff --git a/tests/unit/connector/test_real_kafka.py b/tests/unit/connector/test_real_kafka.py index 36e6a5568..25eb6a909 100644 --- a/tests/unit/connector/test_real_kafka.py +++ b/tests/unit/connector/test_real_kafka.py @@ -64,9 +64,7 @@ def setup_method(self): "bootstrap.servers": "localhost:9092", }, } - self.kafka_output = Factory.create( - {"test output": ouput_config}, logger=logging.getLogger() - ) + self.kafka_output = Factory.create({"test output": ouput_config}) input_config = { "type": "confluentkafka_input", @@ -76,7 +74,7 @@ def setup_method(self): "group.id": "test_consumergroup", }, } - self.kafka_input = Factory.create({"test input": input_config}, logger=logging.getLogger()) + self.kafka_input = Factory.create({"test input": input_config}) self.kafka_input.output_connector = mock.MagicMock() def teardown_method(self): @@ -102,7 +100,7 @@ def test_input_returns_by_output_produced_messages(self): assert event assert event.get("index") == index - def test_librdkafka_logs_forwarded_to_logprep_logger(self): + def test_librdkafka_logs_forwarded_to_logprep_logger(self, caplog): input_config = { "type": "confluentkafka_input", "topic": self.topic_name, @@ -111,13 +109,9 @@ def test_librdkafka_logs_forwarded_to_logprep_logger(self): "group.id": "test_consumergroup", }, } - kafka_input = Factory.create({"librdkafkatest": input_config}, logger=mock.MagicMock()) - kafka_input._logger.log = mock.MagicMock() + kafka_input = Factory.create({"librdkafkatest": input_config}) kafka_input.get_next(10) - kafka_input._logger.log.assert_called() - assert re.search( - r"Failed to resolve 'notexisting:9092'", kafka_input._logger.log.mock_calls[0][1][4] - ) + assert "Failed to resolve 'notexisting:9092'" in caplog.text @pytest.mark.skip(reason="is only for debugging") def test_debugging_consumer(self): @@ -131,7 +125,7 @@ def test_debugging_consumer(self): }, } logger = logging.getLogger() - kafka_input = Factory.create({"librdkafkatest": input_config}, logger=logger) + kafka_input = Factory.create({"librdkafkatest": input_config}) kafka_input.get_next(10) @pytest.mark.xfail(reason="sometimes fails, if not ran isolated") diff --git a/tests/unit/connector/test_s3_output.py b/tests/unit/connector/test_s3_output.py index c0f1699d5..2e3c1793b 100644 --- a/tests/unit/connector/test_s3_output.py +++ b/tests/unit/connector/test_s3_output.py @@ -73,7 +73,7 @@ def test_store_sends_with_default_prefix(self, base_prefix): } s3_config = deepcopy(self.CONFIG) s3_config.update({"message_backlog_size": 2, "base_prefix": base_prefix}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) s3_output.store(event) @@ -95,7 +95,7 @@ def test_store_sends_event_to_with_expected_prefix_if_prefix_missing_in_event( } s3_config = deepcopy(self.CONFIG) s3_config.update({"default_prefix": default_prefix, "message_backlog_size": 2}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) s3_output.store(event) @@ -110,7 +110,7 @@ def test_store_custom_writes_event_with_expected_prefix(self, base_prefix): s3_config = deepcopy(self.CONFIG) s3_config.update({"message_backlog_size": 2}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) s3_output.store_custom(event, custom_prefix) assert s3_output._message_backlog[custom_prefix][0] == expected @@ -129,7 +129,7 @@ def test_store_failed(self, base_prefix): } s3_config = deepcopy(self.CONFIG) s3_config.update({"error_prefix": error_prefix, "message_backlog_size": 2}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) s3_output.store_failed(error_message, event_received, event) @@ -189,7 +189,7 @@ def test_write_to_s3_resource_sets_current_backlog_count_and_below_max_backlog(s s3_config = deepcopy(self.CONFIG) message_backlog_size = 5 s3_config.update({"message_backlog_size": message_backlog_size}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) assert self._calculate_backlog_size(s3_output) == 0 for idx in range(1, message_backlog_size): s3_output._add_to_backlog({"dummy": "event"}, "write_to_s3") @@ -200,7 +200,7 @@ def test_write_to_s3_resource_sets_current_backlog_count_and_is_max_backlog(self s3_config = deepcopy(self.CONFIG) message_backlog_size = 5 s3_config.update({"message_backlog_size": message_backlog_size}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) s3_output._write_document_batch = mock.MagicMock() s3_output._write_document_batch.assert_not_called() @@ -244,7 +244,7 @@ def test_store_calls_batch_finished_callback(self): def test_store_does_not_call_batch_finished_callback_if_disabled(self): s3_config = deepcopy(self.CONFIG) s3_config.update({"call_input_callback": False}) - s3_output = Factory.create({"s3": s3_config}, self.logger) + s3_output = Factory.create({"s3": s3_config}) s3_output._s3_resource = mock.MagicMock() s3_output.input_connector = mock.MagicMock() s3_output.store({"message": "my event message"}) diff --git a/tests/unit/exceptions/test_connector_exceptions.py b/tests/unit/exceptions/test_connector_exceptions.py index 3dd0096ad..a2a77707f 100644 --- a/tests/unit/exceptions/test_connector_exceptions.py +++ b/tests/unit/exceptions/test_connector_exceptions.py @@ -3,8 +3,6 @@ # pylint: disable=protected-access # pylint: disable=line-too-long -from logging import getLogger - from logprep.abc.input import ( CriticalInputError, CriticalInputParsingError, @@ -31,9 +29,7 @@ class TestFatalOutputError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_output", "default": False}}, logger=getLogger() - ) + self.object = Factory.create({"test connector": {"type": "dummy_output", "default": False}}) self.exception_args = (self.object, "the error message") @@ -45,10 +41,7 @@ class TestCriticalOutputError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_output", "default": False}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_output", "default": False}}) self.exception_args = (self.object, "the error message", b"raw input") @@ -60,10 +53,7 @@ class TestOutputError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_output", "default": False}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_output", "default": False}}) self.exception_args = (self.object, "the error message") @@ -75,10 +65,7 @@ class TestOutputWarning(ExceptionBaseTest): counted_metric_name = "number_of_warnings" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_output", "default": False}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_output", "default": False}}) self.exception_args = (self.object, "the error message") @@ -90,10 +77,7 @@ class TestInputError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_input", "documents": []}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_input", "documents": []}}) self.exception_args = (self.object, "the error message") @@ -105,10 +89,7 @@ class TestCriticalInputError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_input", "documents": []}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_input", "documents": []}}) self.exception_args = (self.object, "the error message", b"raw input") @@ -120,10 +101,7 @@ class TestCriticalInputParsingError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_input", "documents": []}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_input", "documents": []}}) self.exception_args = (self.object, "the error message", b"raw input") @@ -135,10 +113,7 @@ class TestFatalInputError(ExceptionBaseTest): counted_metric_name = "number_of_errors" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_input", "documents": []}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_input", "documents": []}}) self.exception_args = (self.object, "the error message") @@ -150,10 +125,7 @@ class TestInputWarning(ExceptionBaseTest): counted_metric_name = "number_of_warnings" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_input", "documents": []}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_input", "documents": []}}) self.exception_args = (self.object, "the error message") @@ -165,8 +137,5 @@ class TestSourceDisconnectedWarning(ExceptionBaseTest): counted_metric_name = "number_of_warnings" def setup_method(self): - self.object = Factory.create( - {"test connector": {"type": "dummy_input", "documents": []}}, - logger=getLogger(), - ) + self.object = Factory.create({"test connector": {"type": "dummy_input", "documents": []}}) self.exception_args = (self.object, "the error message") diff --git a/tests/unit/framework/rule_tree/test_rule_tree.py b/tests/unit/framework/rule_tree/test_rule_tree.py index 8820da815..bf2c52e0e 100644 --- a/tests/unit/framework/rule_tree/test_rule_tree.py +++ b/tests/unit/framework/rule_tree/test_rule_tree.py @@ -48,8 +48,7 @@ def test_init_with_specifying_config(self): "specific_rules": [], "tree_config": "tests/testdata/unit/tree_config.json", } - }, - mock.MagicMock(), + } ) rule_tree = RuleTree(processor_config=processor._config) diff --git a/tests/unit/framework/test_pipeline.py b/tests/unit/framework/test_pipeline.py index 19cda2ce6..624bff3ec 100644 --- a/tests/unit/framework/test_pipeline.py +++ b/tests/unit/framework/test_pipeline.py @@ -4,7 +4,7 @@ import logging import multiprocessing from copy import deepcopy -from logging import DEBUG, getLogger +from logging import DEBUG from multiprocessing import Lock from unittest import mock @@ -95,10 +95,10 @@ def test_passes_timeout_parameter_to_inputs_get_next(self, _): def test_empty_documents_are_not_forwarded_to_other_processors(self, _): input_data = [{"do_not_delete": "1"}, {"delete_me": "2"}, {"do_not_delete": "3"}] connector_config = {"dummy": {"type": "dummy_input", "documents": input_data}} - input_connector = original_create(connector_config, mock.MagicMock()) + input_connector = original_create(connector_config) self.pipeline._input = input_connector self.pipeline._output = { - "dummy": original_create({"dummy": {"type": "dummy_output"}}, mock.MagicMock()), + "dummy": original_create({"dummy": {"type": "dummy_output"}}), } deleter_config = { "deleter processor": { @@ -107,7 +107,7 @@ def test_empty_documents_are_not_forwarded_to_other_processors(self, _): "generic_rules": [], } } - deleter_processor = original_create(deleter_config, mock.MagicMock()) + deleter_processor = original_create(deleter_config) deleter_rule = DeleterRule._create_from_dict( {"filter": "delete_me", "deleter": {"delete": True}} ) @@ -155,10 +155,8 @@ def test_all_events_provided_by_input_arrive_at_output(self, _): input_data = [{"test": "1"}, {"test": "2"}, {"test": "3"}] expected_output_data = deepcopy(input_data) connector_config = {"type": "dummy_input", "documents": input_data} - self.pipeline._input = original_create({"dummy": connector_config}, mock.MagicMock()) - self.pipeline._output = { - "dummy": original_create({"dummy": {"type": "dummy_output"}}, mock.MagicMock()) - } + self.pipeline._input = original_create({"dummy": connector_config}) + self.pipeline._output = {"dummy": original_create({"dummy": {"type": "dummy_output"}})} self.pipeline.run() assert self.pipeline._output["dummy"].events == expected_output_data @@ -307,7 +305,7 @@ def raise_warning(_): @mock.patch("logging.Logger.error") def test_critical_output_error_is_logged_and_counted(self, mock_log_error, _): - dummy_output = original_create({"dummy_output": {"type": "dummy_output"}}, mock.MagicMock()) + dummy_output = original_create({"dummy_output": {"type": "dummy_output"}}) dummy_output.store_failed = mock.MagicMock() def raise_critical(event): @@ -328,7 +326,7 @@ def raise_critical(event): @mock.patch("logging.Logger.warning") def test_warning_output_error_is_logged(self, mock_warning, _): - dummy_output = original_create({"dummy_output": {"type": "dummy_output"}}, mock.MagicMock()) + dummy_output = original_create({"dummy_output": {"type": "dummy_output"}}) def raise_warning(event): raise OutputWarning(self.pipeline._output["dummy"], "mock output warning") @@ -350,9 +348,7 @@ def test_processor_fatal_input_error_is_logged_pipeline_is_shutdown(self, mock_e def raise_fatal_input_error(event): raise FatalInputError(self.pipeline._input, "fatal input error") - self.pipeline._input = original_create( - {"dummy": {"type": "dummy_input", "documents": []}}, getLogger() - ) + self.pipeline._input = original_create({"dummy": {"type": "dummy_input", "documents": []}}) self.pipeline._input.get_next = mock.MagicMock(side_effect=raise_fatal_input_error) self.pipeline._shut_down = mock.MagicMock() self.pipeline.run() @@ -480,7 +476,7 @@ def test_shut_down_drains_input_queues(self, _): "endpoints": {"/json": "json", "/jsonl": "jsonl", "/plaintext": "plaintext"}, } } - self.pipeline._input = original_create(input_config, mock.MagicMock()) + self.pipeline._input = original_create(input_config) self.pipeline._input.pipeline_index = 1 self.pipeline._input.messages = multiprocessing.Queue(-1) self.pipeline._input.setup() diff --git a/tests/unit/framework/test_pipeline_manager.py b/tests/unit/framework/test_pipeline_manager.py index 584eb5d8a..4ac63da07 100644 --- a/tests/unit/framework/test_pipeline_manager.py +++ b/tests/unit/framework/test_pipeline_manager.py @@ -88,7 +88,9 @@ def test_remove_failed_pipelines_logs_warning_for_removed_failed_pipelines(self, ok_pipeline.is_alive = mock.MagicMock(return_value=True) self.manager._pipelines = [failed_pipeline, ok_pipeline] self.manager.restart_failed_pipeline() - logger_mock.assert_called_with("Restarting failed pipeline on index 1 with exit code: -1") + logger_mock.assert_called_with( + "Restarting failed pipeline on index %s with exit code: %s", 1, -1 + ) def test_stop_terminates_processes_created(self): self.manager.set_count(3) @@ -204,5 +206,5 @@ def test_pipeline_manager_sets_queue_size_for_http_input(self): } PipelineManager(config) assert HttpConnector.messages._maxsize == 100 - http_input = Factory.create(config.input, mock.MagicMock()) + http_input = Factory.create(config.input) assert http_input.messages._maxsize == 100 diff --git a/tests/unit/processor/base.py b/tests/unit/processor/base.py index 512d3e730..6fb4ea71b 100644 --- a/tests/unit/processor/base.py +++ b/tests/unit/processor/base.py @@ -92,7 +92,7 @@ def setup_method(self) -> None: self.patchers.append(patcher) super().setup_method() config = {"Test Instance Name": self.CONFIG} - self.object = Factory.create(configuration=config, logger=self.logger) + self.object = Factory.create(configuration=config) self.specific_rules = self.set_rules(self.specific_rules_dirs) self.generic_rules = self.set_rules(self.generic_rules_dirs) @@ -163,7 +163,7 @@ def test_accepts_http_in_rules_config(self): {"generic_rules": ["http://does.not.matter", "https://this.is.not.existent/bla.yml"]} ) with pytest.raises(TypeError, match="not .*MagicMock.*"): - Factory.create({"http_rule_processor": myconfig}, self.logger) + Factory.create({"http_rule_processor": myconfig}) def test_no_redundant_rules_are_added_to_rule_tree(self): """ @@ -222,26 +222,26 @@ def test_validation_raises_if_not_a_list(self, rule_list): config = deepcopy(self.CONFIG) config.update({rule_list: "i am not a list"}) with pytest.raises(TypeError, match=r"must be "): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) @pytest.mark.parametrize("rule_list", ["specific_rules", "generic_rules"]) def test_validation_raises_if_elements_does_not_exist(self, rule_list): config = deepcopy(self.CONFIG) config.update({rule_list: ["/i/do/not/exist"]}) with pytest.raises(FileNotFoundError): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) def test_validation_raises_if_tree_config_is_not_a_str(self): config = deepcopy(self.CONFIG) config.update({"tree_config": 12}) with pytest.raises(TypeError, match=r"must be "): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) def test_validation_raises_if_tree_config_is_not_exist(self): config = deepcopy(self.CONFIG) config.update({"tree_config": "/i/am/not/a/file/path"}) with pytest.raises(FileNotFoundError): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) @responses.activate def test_accepts_tree_config_from_http(self): @@ -249,7 +249,7 @@ def test_accepts_tree_config_from_http(self): config.update({"tree_config": "http://does.not.matter.bla/tree_config.yml"}) tree_config = Path("tests/testdata/unit/tree_config.json").read_text() responses.add(responses.GET, "http://does.not.matter.bla/tree_config.yml", tree_config) - processor = Factory.create({"test instance": config}, self.logger) + processor = Factory.create({"test instance": config}) assert ( processor._specific_tree._processor_config.tree_config == "http://does.not.matter.bla/tree_config.yml" @@ -263,7 +263,7 @@ def test_raises_http_error(self): config.update({"tree_config": "http://does.not.matter.bla/tree_config.yml"}) responses.add(responses.GET, "http://does.not.matter.bla/tree_config.yml", status=404) with pytest.raises(requests.HTTPError): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) @pytest.mark.parametrize( "metric_name, metric_class", diff --git a/tests/unit/processor/domain_label_extractor/test_domain_label_extractor.py b/tests/unit/processor/domain_label_extractor/test_domain_label_extractor.py index bf3eb8215..ee6a042e2 100644 --- a/tests/unit/processor/domain_label_extractor/test_domain_label_extractor.py +++ b/tests/unit/processor/domain_label_extractor/test_domain_label_extractor.py @@ -182,7 +182,7 @@ def test_new_non_default_tagging_field(self): } } - domain_label_extractor = Factory.create(configuration=config, logger=self.logger) + domain_label_extractor = Factory.create(configuration=config) document = {"url": {"domain": "domain.fubarbo"}} expected_output = { "url": {"domain": "domain.fubarbo"}, @@ -203,7 +203,7 @@ def test_append_to_non_default_tagging_field(self): } } - domain_label_extractor = Factory.create(config, self.logger) + domain_label_extractor = Factory.create(config) document = {"url": {"domain": "domain.fubarbo"}, "special_tags": ["source"]} expected_output = { "url": {"domain": "domain.fubarbo"}, diff --git a/tests/unit/processor/domain_resolver/test_domain_resolver.py b/tests/unit/processor/domain_resolver/test_domain_resolver.py index c581249a1..bf8962d6e 100644 --- a/tests/unit/processor/domain_resolver/test_domain_resolver.py +++ b/tests/unit/processor/domain_resolver/test_domain_resolver.py @@ -87,7 +87,7 @@ def test_url_to_ip_resolved_and_added(self, _): def test_domain_ip_map_greater_cache(self): config = deepcopy(self.CONFIG) config.update({"max_cached_domains": 1}) - self.object = Factory.create({"resolver": config}, self.logger) + self.object = Factory.create({"resolver": config}) rule = { "filter": "url", "domain_resolver": {"source_fields": ["url"]}, @@ -119,7 +119,7 @@ def test_do_nothing_if_source_not_in_event(self): def test_url_to_ip_resolved_and_added_with_debug_cache(self, _): config = deepcopy(self.CONFIG) config.update({"debug_cache": True}) - self.object = Factory.create({"resolver": config}, self.logger) + self.object = Factory.create({"resolver": config}) rule = { "filter": "url", "domain_resolver": {"source_fields": ["url"]}, @@ -139,7 +139,7 @@ def test_url_to_ip_resolved_and_added_with_debug_cache(self, _): def test_url_to_ip_resolved_from_cache_and_added_with_debug_cache(self, _): config = deepcopy(self.CONFIG) config.update({"debug_cache": True}) - self.object = Factory.create({"resolver": config}, self.logger) + self.object = Factory.create({"resolver": config}) rule = { "filter": "url", "domain_resolver": {"source_fields": ["url"]}, @@ -161,7 +161,7 @@ def test_url_to_ip_resolved_from_cache_and_added_with_debug_cache(self, _): def test_url_to_ip_resolved_and_added_with_cache_disabled(self, _): config = deepcopy(self.CONFIG) config.update({"cache_enabled": False}) - self.object = Factory.create({"resolver": config}, self.logger) + self.object = Factory.create({"resolver": config}) rule = { "filter": "url", "domain_resolver": {"source_fields": ["url"]}, @@ -189,7 +189,7 @@ def test_resolves_with_tld_extract_tld_lists(self, _): responses.add(responses.GET, "http://does_not_matter", response_content) config = deepcopy(self.CONFIG) config.update({"tld_lists": ["http://does_not_matter"]}) - domain_resolver = Factory.create({"test instance": config}, self.logger) + domain_resolver = Factory.create({"test instance": config}) document = {"url": "http://www.google.ac.at/some/text"} expected = {"url": "http://www.google.ac.at/some/text", "resolved_ip": "1.2.3.4"} domain_resolver.process(document) @@ -199,7 +199,7 @@ def test_resolves_with_tld_extract_tld_lists(self, _): def test_invalid_dots_domain_to_ip_produces_warning(self): config = deepcopy(self.CONFIG) config.update({"tld_list": TLD_LIST}) - domain_resolver = Factory.create({"test instance": config}, self.logger) + domain_resolver = Factory.create({"test instance": config}) assert self.object.metrics.number_of_processed_events == 0 document = {"url": "google..invalid.de"} diff --git a/tests/unit/processor/generic_adder/test_generic_adder.py b/tests/unit/processor/generic_adder/test_generic_adder.py index b66f11fdc..8bacf95e6 100644 --- a/tests/unit/processor/generic_adder/test_generic_adder.py +++ b/tests/unit/processor/generic_adder/test_generic_adder.py @@ -416,7 +416,7 @@ def test_add_generic_fields_from_file_missing_and_existing_with_all_required(sel config = deepcopy(self.CONFIG) config["specific_rules"] = [RULES_DIR_MISSING] configuration = {"test_instance_name": config} - Factory.create(configuration, self.logger) + Factory.create(configuration) def test_add_generic_fields_from_file_invalid(self): with pytest.raises( @@ -426,7 +426,7 @@ def test_add_generic_fields_from_file_invalid(self): config = deepcopy(self.CONFIG) config["generic_rules"] = [RULES_DIR_INVALID] configuration = {"test processor": config} - Factory.create(configuration, self.logger) + Factory.create(configuration) class BaseTestGenericAdderSQLTestCase(BaseProcessorTestCase): @@ -773,9 +773,9 @@ def test_sql_table_must_contain_only_alphanumeric_or_underscore( if raised_error: with pytest.raises(raised_error[0], match=raised_error[1]): - Factory.create({"Test Instance Name": config}, self.logger) + Factory.create({"Test Instance Name": config}) else: - Factory.create({"Test Instance Name": config}, self.logger) + Factory.create({"Test Instance Name": config}) class TestGenericAdderProcessorSQLWithAddedTarget(BaseTestGenericAdderSQLTestCase): diff --git a/tests/unit/processor/grokker/test_grokker.py b/tests/unit/processor/grokker/test_grokker.py index 4a24e0a00..1c285a263 100644 --- a/tests/unit/processor/grokker/test_grokker.py +++ b/tests/unit/processor/grokker/test_grokker.py @@ -467,7 +467,7 @@ def test_loads_patterns_without_custom_patterns_dir(self): config |= { "custom_patterns_dir": "", } - grokker = Factory.create({"grokker": config}, self.logger) + grokker = Factory.create({"grokker": config}) assert len(grokker.rules) > 0 def test_loads_custom_patterns(self): diff --git a/tests/unit/processor/labeler/test_labeler.py b/tests/unit/processor/labeler/test_labeler.py index 19ff4cee2..c6085d7bc 100644 --- a/tests/unit/processor/labeler/test_labeler.py +++ b/tests/unit/processor/labeler/test_labeler.py @@ -239,7 +239,7 @@ def test_create_fails_when_include_parent_labels_is_not_boolean(self): TypeError, match="'include_parent_labels' must be ", ): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) def test_create_fails_when_rules_do_not_conform_to_labeling_schema(self): config = copy.deepcopy(self.CONFIG) @@ -247,13 +247,13 @@ def test_create_fails_when_rules_do_not_conform_to_labeling_schema(self): with raises( ValueDoesnotExistInSchemaError, match="Invalid value 'windows' for key 'reporter'." ): - labeler = Factory.create({"test instance": config}, self.logger) + labeler = Factory.create({"test instance": config}) labeler.setup() def test_create_loads_the_specified_labeling_schema(self): config = copy.deepcopy(self.CONFIG) config["schema"] = path_to_schema expected_schema = LabelingSchema.create_from_file(path_to_schema) - labeler = Factory.create({"test instance": config}, self.logger) + labeler = Factory.create({"test instance": config}) assert labeler._schema == expected_schema diff --git a/tests/unit/processor/list_comparison/test_list_comparison.py b/tests/unit/processor/list_comparison/test_list_comparison.py index c6d56eca6..9e8f3104c 100644 --- a/tests/unit/processor/list_comparison/test_list_comparison.py +++ b/tests/unit/processor/list_comparison/test_list_comparison.py @@ -270,7 +270,7 @@ def test_list_comparison_loads_rule_with_http_template_in_list_search_base_path( "generic_rules": [], "list_search_base_path": "http://localhost/tests/testdata/${LOGPREP_LIST}?ref=bla", } - processor = Factory.create({"custom_lister": config}, self.logger) + processor = Factory.create({"custom_lister": config}) rule = processor.rule_class._create_from_dict(rule_dict) processor._specific_tree.add_rule(rule) processor.setup() diff --git a/tests/unit/processor/normalizer/test_normalizer.py b/tests/unit/processor/normalizer/test_normalizer.py index bd096287b..71a97aa4b 100644 --- a/tests/unit/processor/normalizer/test_normalizer.py +++ b/tests/unit/processor/normalizer/test_normalizer.py @@ -1110,7 +1110,7 @@ def test_normalization_with_grok_pattern_count(self): {"count_grok_pattern_matches": {"count_directory_path": temp_path, "write_period": 0}} ) processor_config = {"Test Normalizer Name": config} - self.object = Factory.create(processor_config, self.logger) + self.object = Factory.create(processor_config) event = { "winlog": { diff --git a/tests/unit/processor/pseudonymizer/test_pseudonymizer.py b/tests/unit/processor/pseudonymizer/test_pseudonymizer.py index 28ea32025..8e9e81378 100644 --- a/tests/unit/processor/pseudonymizer/test_pseudonymizer.py +++ b/tests/unit/processor/pseudonymizer/test_pseudonymizer.py @@ -747,9 +747,9 @@ def test_config_validation(self, config_change, error, msg): config |= config_change if error: with pytest.raises(error, match=msg): - Factory.create({"name": config}, self.logger) + Factory.create({"name": config}) else: - Factory.create({"name": config}, self.logger) + Factory.create({"name": config}) @pytest.mark.parametrize("testcase, rule, event, expected, regex_mapping", test_cases) def test_testcases(self, testcase, rule, event, expected, regex_mapping): @@ -762,7 +762,7 @@ def test_testcases(self, testcase, rule, event, expected, regex_mapping): def test_tld_extractor_uses_file(self): config = deepcopy(self.CONFIG) config["tld_lists"] = [TLD_LIST] - object_with_tld_list = Factory.create({"pseudonymizer": config}, self.logger) + object_with_tld_list = Factory.create({"pseudonymizer": config}) assert len(object_with_tld_list._tld_extractor.suffix_list_urls) == 1 assert object_with_tld_list._tld_extractor.suffix_list_urls[0].endswith( "tests/testdata/mock_external/tld_list.dat", diff --git a/tests/unit/processor/template_replacer/test_template_replacer.py b/tests/unit/processor/template_replacer/test_template_replacer.py index 90baee715..3c6229fc3 100644 --- a/tests/unit/processor/template_replacer/test_template_replacer.py +++ b/tests/unit/processor/template_replacer/test_template_replacer.py @@ -81,7 +81,7 @@ def test_replace_fails_because_it_does_not_map_to_anything(self): def test_replace_dotted_message_via_template(self): config = deepcopy(self.CONFIG) config.get("pattern").update({"target_field": "dotted.message"}) - self.object = Factory.create({"test instance": config}, self.logger) + self.object = Factory.create({"test instance": config}) document = { "winlog": {"channel": "System", "provider_name": "Test", "event_id": 123}, "dotted": {"message": "foo"}, @@ -96,7 +96,7 @@ def test_replace_dotted_message_via_template(self): def test_replace_non_existing_dotted_message_via_template(self): config = deepcopy(self.CONFIG) config.get("pattern").update({"target_field": "dotted.message"}) - self.object = Factory.create({"test instance": config}, self.logger) + self.object = Factory.create({"test instance": config}) document = {"winlog": {"channel": "System", "provider_name": "Test", "event_id": 123}} self.object.process(document) @@ -108,7 +108,7 @@ def test_replace_non_existing_dotted_message_via_template(self): def test_replace_partly_existing_dotted_message_via_template(self): config = deepcopy(self.CONFIG) config.get("pattern").update({"target_field": "dotted.message"}) - self.object = Factory.create({"test instance": config}, self.logger) + self.object = Factory.create({"test instance": config}) document = { "winlog": {"channel": "System", "provider_name": "Test", "event_id": 123}, "dotted": {"bar": "foo"}, @@ -124,7 +124,7 @@ def test_replace_partly_existing_dotted_message_via_template(self): def test_replace_existing_dotted_message_dict_via_template(self): config = deepcopy(self.CONFIG) config.get("pattern").update({"target_field": "dotted.message"}) - self.object = Factory.create({"test instance": config}, self.logger) + self.object = Factory.create({"test instance": config}) document = { "winlog": {"channel": "System", "provider_name": "Test", "event_id": 123}, "dotted": {"message": {"foo": "bar"}}, @@ -139,7 +139,7 @@ def test_replace_existing_dotted_message_dict_via_template(self): def test_replace_incompatible_existing_dotted_message_parent_via_template(self, caplog): config = deepcopy(self.CONFIG) config.get("pattern").update({"target_field": "dotted.message"}) - self.object = Factory.create({"test instance": config}, self.logger) + self.object = Factory.create({"test instance": config}) document = { "winlog": {"channel": "System", "provider_name": "Test", "event_id": 123}, "dotted": "foo", @@ -155,4 +155,4 @@ def test_replace_fails_with_invalid_template(self): {"template": "tests/testdata/unit/template_replacer/replacer_template_invalid.yml"} ) with pytest.raises(TemplateReplacerError, match="Not enough delimiters"): - Factory.create({"test instance": config}, self.logger) + Factory.create({"test instance": config}) diff --git a/tests/unit/processor/test_process.py b/tests/unit/processor/test_process.py index 4b6d80e39..392bd9883 100644 --- a/tests/unit/processor/test_process.py +++ b/tests/unit/processor/test_process.py @@ -23,8 +23,7 @@ def test_process(self, mock_process_rule_tree): "generic_rules": [], "specific_rules": [], } - }, - mock.MagicMock(), + } ) processor.process({}) mock_process_rule_tree.assert_called() @@ -39,8 +38,7 @@ def test_process_specific_before_generic(self, mock_process_rule_tree): "generic_rules": [], "specific_rules": [], } - }, - mock.MagicMock(), + } ) processor.process({}) assert mock_process_rule_tree.call_count == 2 @@ -57,7 +55,7 @@ def test_apply_processor_multiple_times_until_no_new_rule_matches(self): "generic_rules": [], "apply_multiple_times": True, } - processor = Factory.create({"custom_lister": config}, getLogger("test-logger")) + processor = Factory.create({"custom_lister": config}) rule_one_dict = { "filter": "message", "dissector": {"mapping": {"message": "%{time} [%{protocol}] %{url}"}}, @@ -84,7 +82,7 @@ def test_apply_processor_multiple_times_until_no_new_rule_matches(self): def test_apply_processor_multiple_times_not_enabled(self): config = {"type": "dissector", "specific_rules": [], "generic_rules": []} - processor = Factory.create({"custom_lister": config}, getLogger("test-logger")) + processor = Factory.create({"custom_lister": config}) rule_one_dict = { "filter": "message", "dissector": {"mapping": {"message": "%{time} [%{protocol}] %{url}"}}, @@ -110,7 +108,7 @@ def test_apply_processor_multiple_times_not_enabled(self): @pytest.mark.parametrize("execution_number", range(5)) # repeat test to ensure determinism def test_applies_rules_in_deterministic_order(self, execution_number): config = {"type": "generic_adder", "specific_rules": [], "generic_rules": []} - processor = Factory.create({"custom_lister": config}, getLogger("test-logger")) + processor = Factory.create({"custom_lister": config}) rule_one_dict = {"filter": "val", "generic_adder": {"add": {"some": "value"}}} rule_two_dict = {"filter": "NOT something", "generic_adder": {"add": {"something": "else"}}} rule_one = GenericAdderRule._create_from_dict(rule_one_dict) diff --git a/tests/unit/test_factory.py b/tests/unit/test_factory.py index 42acb99f2..f1e6ec6b4 100644 --- a/tests/unit/test_factory.py +++ b/tests/unit/test_factory.py @@ -2,7 +2,6 @@ # pylint: disable=protected-access # pylint: disable=too-many-lines import re -from logging import getLogger from random import sample from string import ascii_letters from unittest import mock @@ -22,8 +21,6 @@ from logprep.processor.pseudonymizer.processor import Pseudonymizer from tests.testdata.metadata import path_to_schema, path_to_single_rule -logger = getLogger() - @mark.parametrize( ["configs", "error", "message"], @@ -54,7 +51,7 @@ def test_create_from_dict_validates_config(configs, error, message): for config in configs: with raises(error) as exception_info: - Factory.create(config, logger) + Factory.create(config) value = str(exception_info.value) assertion_error_message = ( f'Error message of "{error.__name__}" did not match regex for test input ' @@ -69,7 +66,7 @@ def test_create_fails_for_unknown_type(): "".join(sample(ascii_letters, 6)) for i in range(5) ]: with raises(UnknownComponentTypeError): - Factory.create({"processorname": {"type": type_name}}, logger) + Factory.create({"processorname": {"type": type_name}}) def test_create_pseudonymizer_returns_pseudonymizer_processor(): @@ -86,8 +83,7 @@ def test_create_pseudonymizer_returns_pseudonymizer_processor(): "outputs": [{"kafka": "topic"}], "max_cached_pseudonyms": 1000000, } - }, - logger, + } ) assert isinstance(processor, Pseudonymizer) @@ -102,8 +98,7 @@ def test_create_clusterer_returns_clusterer_processor(): "specific_rules": ["tests/testdata/unit/clusterer/rules/specific"], "generic_rules": ["tests/testdata/unit/clusterer/rules/generic"], } - }, - logger, + } ) assert isinstance(processor, Clusterer) @@ -115,7 +110,7 @@ def test_fails_when_section_contains_more_than_one_element(): match=r"Found multiple component definitions \(first, second\), " r"but there must be exactly one\.", ): - Factory.create({"first": mock.MagicMock(), "second": mock.MagicMock()}, logger) + Factory.create({"first": mock.MagicMock(), "second": mock.MagicMock()}) def test_create_labeler_creates_labeler_processor(): @@ -127,8 +122,7 @@ def test_create_labeler_creates_labeler_processor(): "generic_rules": [path_to_single_rule], "specific_rules": [path_to_single_rule], } - }, - logger, + } ) assert isinstance(processor, Labeler) @@ -152,8 +146,7 @@ def test_creates_calculator_with_inline_rules(): }, ], } - }, - logger, + } ) assert len(processor._generic_rules) == 1 assert len(processor._specific_rules) == 1 @@ -179,8 +172,7 @@ def test_creates_calculator_with_inline_rules_and_files(): "tests/testdata/unit/calculator/specific_rules/calculator.json", ], } - }, - logger, + } ) assert len(processor._generic_rules) == 2 assert len(processor._specific_rules) == 2 @@ -208,8 +200,7 @@ def test_creates_calculator_with_inline_rules_and_file_and_directory(): "tests/testdata/unit/calculator/specific_rules/calculator.json", ], } - }, - logger, + } ) assert len(processor._generic_rules) == 2 assert len(processor._specific_rules) == 2 @@ -218,7 +209,6 @@ def test_creates_calculator_with_inline_rules_and_file_and_directory(): def test_dummy_input_creates_dummy_input_connector(): processor = Factory.create( {"labelername": {"type": "dummy_input", "documents": [{}, {}]}}, - logger, ) assert isinstance(processor, Input) diff --git a/tests/unit/util/test_auto_rule_corpus_tester.py b/tests/unit/util/test_auto_rule_corpus_tester.py index 1cd4ef102..8c0403c33 100644 --- a/tests/unit/util/test_auto_rule_corpus_tester.py +++ b/tests/unit/util/test_auto_rule_corpus_tester.py @@ -11,6 +11,7 @@ import pytest from logprep.util.auto_rule_tester.auto_rule_corpus_tester import RuleCorpusTester +from logprep.util.configuration import Configuration from logprep.util.defaults import DEFAULT_LOG_CONFIG from logprep.util.getter import GetterFactory @@ -469,34 +470,3 @@ def test_corpus_tests_dont_share_cache_between_runs_by_resetting_processors( for expected_print in expected_prints: assert expected_print in console_output mock_exit.assert_called_with(0) - - @mock.patch("logprep.util.auto_rule_tester.auto_rule_corpus_tester.sys.exit") - def test_warnings_are_printed_inside_the_detailed_reports(self, mock_exit, tmp_path, capsys): - test_case_data = { - "input": { - "field1": 2, - "field2": 2, - "new_field": "exists already", - }, - "expected_output": { - "field1": 2, - "field2": 2, - "new_field": "exists already", - }, - "expected_extra_output": [], - } - test_data_dir = tmp_path / "test_data" - os.makedirs(test_data_dir, exist_ok=True) - write_test_case_data_tmp_files(test_data_dir, "test_case_one", test_case_data) - config_path = ["tests/testdata/config/config.yml"] - corpus_tester = RuleCorpusTester(config_path, test_data_dir) - corpus_tester.run() - console_output, console_error = capsys.readouterr() - assert console_error == "" - warnings_inside_details_pattern = ( - r".*Test Cases Detailed Reports.*test_case_one.*" - r"Logprep Warnings.*FieldExistsWarning.*test_case_one.*" - r"Test Overview" - ) - assert re.match(warnings_inside_details_pattern, console_output, flags=re.DOTALL) - mock_exit.assert_called_with(1)