From cbf270f03f5774a0bbaf9fa7e1d60aef607c0680 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Thu, 5 Dec 2024 17:54:11 +0100 Subject: [PATCH 01/13] rename test_http_input acceptance test we have two test files with the same name, that as an influence on test collection in pycharm. This is an easy workaround. --- .../{test_http_input.py => test_http_input_with_requests.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/acceptance/{test_http_input.py => test_http_input_with_requests.py} (100%) diff --git a/tests/acceptance/test_http_input.py b/tests/acceptance/test_http_input_with_requests.py similarity index 100% rename from tests/acceptance/test_http_input.py rename to tests/acceptance/test_http_input_with_requests.py From 69aed89cfe2ca3f54481e38d8c6f64b44ed90785 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Thu, 5 Dec 2024 17:54:23 +0100 Subject: [PATCH 02/13] remove unused import --- logprep/connector/file/input.py | 1 - 1 file changed, 1 deletion(-) diff --git a/logprep/connector/file/input.py b/logprep/connector/file/input.py index 3c5a74897..1d6e439de 100644 --- a/logprep/connector/file/input.py +++ b/logprep/connector/file/input.py @@ -23,7 +23,6 @@ import queue import threading import zlib -from logging import Logger from typing import Callable, TextIO from attrs import define, field, validators From b8349d3b52ff08d4c17f6153ea8e1d4705fcf785 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Thu, 5 Dec 2024 17:55:09 +0100 Subject: [PATCH 03/13] improve test speed by mocking the threaded http server --- logprep/connector/http/input.py | 27 ++++++++++--------- tests/unit/connector/test_http_input.py | 36 +++++++++++-------------- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/logprep/connector/http/input.py b/logprep/connector/http/input.py index 57436d58b..2f97f8d9a 100644 --- a/logprep/connector/http/input.py +++ b/logprep/connector/http/input.py @@ -23,10 +23,10 @@ host: 0.0.0.0 port: 9000 endpoints: - /firstendpoint: json + /firstendpoint: json /second*: plaintext /(third|fourth)/endpoint: jsonl - + The endpoint config supports regex and wildcard patterns: * :code:`/second*`: matches everything after asterisk * :code:`/(third|fourth)/endpoint` matches either third or forth in the first part @@ -38,7 +38,7 @@ add basic authentication for a specific endpoint. The format of this file would look like: .. code-block:: yaml - :caption: Example for credentials file + :caption: Example for credentials file :linenos: input: @@ -49,7 +49,7 @@ /second*: username: user password: secret_password - + You can choose between a plain secret with the key :code:`password` or a filebased secret with the key :code:`password_file`. @@ -60,20 +60,20 @@ - basic auth must only be used with strong passwords - basic auth must only be used with TLS encryption - avoid to reveal your plaintext secrets in public repositories - + Behaviour of HTTP Requests ^^^^^^^^^^^^^^^^^^^^^^^^^^ * :code:`GET`: - + * Responds always with 200 (ignores configured Basic Auth) * When Messages Queue is full, it responds with 429 * :code:`POST`: - + * Responds with 200 on non-Basic Auth Endpoints * Responds with 401 on Basic Auth Endpoints (and 200 with appropriate credentials) * When Messages Queue is full, it responds wiht 429 * :code:`ALL OTHER`: - + * Responds with 405 """ @@ -353,7 +353,7 @@ class Config(Input.Config): :title: Uvicorn Webserver Configuration :location: uvicorn_config :suggested-value: uvicorn_config.access_log: true, uvicorn_config.server_header: false, uvicorn_config.data_header: false - + Additionally to the below it is recommended to configure `ssl on the metrics server endpoint `_ @@ -378,8 +378,8 @@ class Config(Input.Config): """Configure endpoint routes with a Mapping of a path to an endpoint. Possible endpoints are: :code:`json`, :code:`jsonl`, :code:`plaintext`. It's possible to use wildcards and regexps for pattern matching. - - + + .. autoclass:: logprep.connector.http.input.PlaintextHttpEndpoint :noindex: .. autoclass:: logprep.connector.http.input.JSONLHttpEndpoint @@ -430,6 +430,7 @@ def __init__(self, name: str, configuration: "HttpInput.Config") -> None: ) schema = "https" if ssl_options else "http" self.target = f"{schema}://{host}:{port}" + self.app = None self.http_server = None def setup(self): @@ -462,9 +463,9 @@ def setup(self): self.metrics, ) - app = self._get_asgi_app(endpoints_config) + self.app = self._get_asgi_app(endpoints_config) self.http_server = http.ThreadingHTTPServer( - self._config.uvicorn_config, app, daemon=False, logger_name="HTTPServer" + self._config.uvicorn_config, self.app, daemon=False, logger_name="HTTPServer" ) self.http_server.start() diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 66828c08f..7bd94546f 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -13,16 +13,15 @@ import pytest import requests import responses -import uvicorn -from requests.auth import HTTPBasicAuth - from falcon import testing +from requests.auth import HTTPBasicAuth from logprep.abc.input import FatalInputError from logprep.connector.http.input import HttpInput from logprep.factory import Factory from logprep.framework.pipeline_manager import ThrottlingQueue from logprep.util.defaults import ENV_NAME_LOGPREP_CREDENTIALS_FILE +from logprep.util.http import ThreadingHTTPServer from tests.unit.connector.base import BaseInputTestCase @@ -50,6 +49,10 @@ def create_credentials(tmp_path): return str(credential_file_path) +original_thread_start = ThreadingHTTPServer.start +ThreadingHTTPServer.start = mock.MagicMock() + + class TestHttpConnector(BaseInputTestCase): def setup_method(self): @@ -59,9 +62,8 @@ def setup_method(self): super().setup_method() self.object.pipeline_index = 1 self.object.setup() - self.app = self.object.http_server.server.config.app self.target = self.object.target - self.client = testing.TestClient(self.app) + self.client = testing.TestClient(self.object.app) CONFIG: dict = { "type": "http_input", @@ -220,7 +222,7 @@ def test_get_next_returns_first_in_first_out(self): {"message": "third message"}, ] for message in data: - requests.post(url=self.target + "/json", json=message, timeout=0.5) + self.client.post("/json", json=message) assert self.object.get_next(0.001) == data[0] assert self.object.get_next(0.001) == data[1] assert self.object.get_next(0.001) == data[2] @@ -244,8 +246,8 @@ def test_get_next_returns_first_in_first_out_for_mixed_endpoints(self): def test_get_next_returns_none_for_empty_queue(self): assert self.object.get_next(0.001) is None - def test_server_returns_uvicorn_server_instance(self): - assert isinstance(self.object.http_server.server, uvicorn.Server) + def test_http_server_is_of_instance_threading_http_server(self): + assert isinstance(self.object.http_server, ThreadingHTTPServer) def test_server_starts_threaded_server(self): message = {"message": "my message"} @@ -262,11 +264,11 @@ def test_get_metadata(self): connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() - target = connector.target - resp = requests.post(url=f"{target}/json", json=message, timeout=0.5) + client = testing.TestClient(connector.app) + resp = client.post("/json", json=message) assert resp.status_code == 200 message = connector.messages.get(timeout=0.5) - assert message["custom"]["url"] == target + "/json" + assert message["custom"]["url"].endswith("/json") assert re.search(r"\d+\.\d+\.\d+\.\d+", message["custom"]["remote_addr"]) assert isinstance(message["custom"]["user_agent"], str) @@ -277,20 +279,14 @@ def test_server_multiple_config_changes(self): connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() - target = connector.target - resp = requests.post(url=f"{target}/json", json=message, timeout=0.5) + client = testing.TestClient(connector.app) + resp = client.post("/json", json=message) assert resp.status_code == 200 - target = target.replace(":9001", ":9000") - try: - resp = requests.post(url=f"{target}/json", json=message, timeout=0.5) - except requests.exceptions.ConnectionError as e: - assert e.response is None connector_config = deepcopy(self.CONFIG) connector = Factory.create({"test connector": connector_config}) connector.pipeline_index = 1 connector.setup() - target = connector.target - resp = requests.post(url=f"{target}/json", json=message, timeout=0.5) + resp = client.post("/json", json=message) assert resp.status_code == 200 def test_get_next_with_hmac_of_raw_message(self): From 2544fcc3d97faca3ce674e769a8ee51657014bc1 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 10:03:38 +0100 Subject: [PATCH 04/13] remove `simulate` from client calls - updated test cases to use direct `get` and `post` methods instead of `simulate_get` and `simulate_post`. - improves the clarity of the test code and aligns with the updated API usage. - generalizes implementation --- tests/unit/connector/test_http_input.py | 54 ++++++++++++------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 7bd94546f..2af56e86e 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -115,43 +115,43 @@ def test_not_first_pipeline(self): assert connector.http_server is None def test_get_method_returns_200(self): - resp = self.client.simulate_get("/json") + resp = self.client.get("/json") assert resp.status_code == 200 def test_get_method_returns_200_with_authentication(self): - resp = self.client.simulate_get("/auth-json-secret") + resp = self.client.get("/auth-json-secret") assert resp.status_code == 200 def test_get_method_returns_429_if_queue_is_full(self): self.object.messages.full = mock.MagicMock() self.object.messages.full.return_value = True - resp = self.client.simulate_get("/json") + resp = self.client.get("/json") assert resp.status_code == 429 def test_get_error_code_too_many_requests(self): data = {"message": "my log message"} self.object.messages.put = mock.MagicMock() self.object.messages.put.side_effect = queue.Full() - resp = self.client.simulate_post("/json", json=data) + resp = self.client.post("/json", json=data) assert resp.status_code == 429 def test_json_endpoint_accepts_post_request(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/json", json=data) + resp = self.client.post("/json", json=data) assert resp.status_code == 200 def test_json_endpoint_match_wildcard_route(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/json", json=data) + resp = self.client.post("/json", json=data) assert resp.status_code == 200 def test_json_endpoint_not_match_wildcard_route(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/api/wildcard_path/json/another_path", json=data) + resp = self.client.post("/api/wildcard_path/json/another_path", json=data) assert resp.status_code == 404 data = {"message": "my log message"} - resp = self.client.simulate_post("/json", json=data) + resp = self.client.post("/json", json=data) assert resp.status_code == 200 event_from_queue = self.object.messages.get(timeout=0.001) @@ -159,39 +159,39 @@ def test_json_endpoint_not_match_wildcard_route(self): def test_plaintext_endpoint_accepts_post_request(self): data = "my log message" - resp = self.client.simulate_post("/plaintext", json=data) + resp = self.client.post("/plaintext", json=data) assert resp.status_code == 200 def test_plaintext_message_is_put_in_queue(self): data = "my log message" - resp = self.client.simulate_post("/plaintext", body=data) + resp = self.client.post("/plaintext", body=data) assert resp.status_code == 200 event_from_queue = self.object.messages.get(timeout=0.001) assert event_from_queue.get("message") == data def test_jsonl_endpoint_match_regex_route(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/first/jsonl", json=data) + resp = self.client.post("/first/jsonl", json=data) assert resp.status_code == 200 def test_jsonl_endpoint_not_match_regex_route(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/firs/jsonl", json=data) + resp = self.client.post("/firs/jsonl", json=data) assert resp.status_code == 404 def test_jsonl_endpoint_not_match_before_start_regex(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/api/first/jsonl", json=data) + resp = self.client.post("/api/first/jsonl", json=data) assert resp.status_code == 404 def test_jsonl_endpoint_match_wildcard_regex_mix_route(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/third/jsonl/another_path/last_path", json=data) + resp = self.client.post("/third/jsonl/another_path/last_path", json=data) assert resp.status_code == 200 def test_jsonl_endpoint_not_match_wildcard_regex_mix_route(self): data = {"message": "my log message"} - resp = self.client.simulate_post("/api/third/jsonl/another_path", json=data) + resp = self.client.post("/api/third/jsonl/another_path", json=data) assert resp.status_code == 404 def test_jsonl_messages_are_put_in_queue(self): @@ -200,7 +200,7 @@ def test_jsonl_messages_are_put_in_queue(self): {"message": "my second log message"} {"message": "my third log message"} """ - resp = self.client.simulate_post("/jsonl", body=data) + resp = self.client.post("/jsonl", body=data) assert resp.status_code == 200 assert self.object.messages.qsize() == 3 event_from_queue = self.object.messages.get(timeout=1) @@ -212,7 +212,7 @@ def test_jsonl_messages_are_put_in_queue(self): def test_get_next_returns_message_from_queue(self): data = {"message": "my log message"} - self.client.simulate_post("/json", json=data) + self.client.post("/json", json=data) assert self.object.get_next(0.001) == data def test_get_next_returns_first_in_first_out(self): @@ -236,9 +236,9 @@ def test_get_next_returns_first_in_first_out_for_mixed_endpoints(self): for message in data: endpoint, post_data = message.values() if endpoint == "json": - self.client.simulate_post("/json", json=post_data) + self.client.post("/json", json=post_data) if endpoint == "plaintext": - self.client.simulate_post("/plaintext", body=post_data) + self.client.post("/plaintext", body=post_data) assert self.object.get_next(0.001) == data[0].get("data") assert self.object.get_next(0.001) == {"message": data[1].get("data")} assert self.object.get_next(0.001) == data[2].get("data") @@ -253,7 +253,7 @@ def test_server_starts_threaded_server(self): message = {"message": "my message"} for i in range(100): message["message"] = f"message number {i}" - self.client.simulate_post("/json", json=message) + self.client.post("/json", json=message) assert self.object.messages.qsize() == 100, "messages are put to queue" def test_get_metadata(self): @@ -306,7 +306,7 @@ def test_get_next_with_hmac_of_raw_message(self): connector.pipeline_index = 1 connector.setup() test_event = "the content" - self.client.simulate_post("/plaintext", body=test_event) + self.client.post("/plaintext", body=test_event) expected_event = { "message": "the content", @@ -395,17 +395,17 @@ def test_messages_is_multiprocessing_queue(self): def test_all_endpoints_share_the_same_queue(self): data = {"message": "my log message"} - self.client.simulate_post("/json", json=data) + self.client.post("/json", json=data) assert self.object.messages.qsize() == 1 data = "my log message" - self.client.simulate_post("/plaintext", json=data) + self.client.post("/plaintext", json=data) assert self.object.messages.qsize() == 2 data = """ {"message": "my first log message"} {"message": "my second log message"} {"message": "my third log message"} """ - self.client.simulate_post("/jsonl", body=data) + self.client.post("/jsonl", body=data) assert self.object.messages.qsize() == 5 def test_sets_target_to_https_schema_if_ssl_options(self): @@ -432,7 +432,7 @@ def test_enpoints_count_requests(self): self.object.setup() random_number = random.randint(1, 100) for number in range(random_number): - self.client.simulate_post("/json", json={"message": f"my message{number}"}) + self.client.post("/json", json={"message": f"my message{number}"}) assert self.object.metrics.number_of_http_requests == random_number @pytest.mark.parametrize("endpoint", ["json", "plaintext", "jsonl"]) @@ -440,13 +440,13 @@ def test_endpoint_handles_gzip_compression(self, endpoint): data = {"message": "my log message"} data = gzip.compress(json.dumps(data).encode()) headers = {"Content-Encoding": "gzip"} - resp = self.client.simulate_post(f"/{endpoint}", body=data, headers=headers) + resp = self.client.post(f"/{endpoint}", body=data, headers=headers) assert resp.status_code == 200 @pytest.mark.parametrize("endpoint", ["json", "jsonl"]) def test_raises_http_bad_request_on_decode_error(self, endpoint): data = "this is not a valid json nor jsonl" - resp = self.client.simulate_post(f"/{endpoint}", body=data) + resp = self.client.post(f"/{endpoint}", body=data) assert resp.status_code == 400 @responses.activate From 31b52570e75e40fdd1b377fc6c2f24d848704469 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 10:25:00 +0100 Subject: [PATCH 05/13] drastically improve test speed by removing 10 repetitions in loop - doing 90 instead of 100 loop repetitions decreases test from 4 seconds down to 33 milliseconds --- tests/unit/connector/test_http_input.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 2af56e86e..6ffc2c4b1 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -251,10 +251,10 @@ def test_http_server_is_of_instance_threading_http_server(self): def test_server_starts_threaded_server(self): message = {"message": "my message"} - for i in range(100): + for i in range(90): message["message"] = f"message number {i}" self.client.post("/json", json=message) - assert self.object.messages.qsize() == 100, "messages are put to queue" + assert self.object.messages.qsize() == 90, "messages are put to queue" def test_get_metadata(self): message = {"message": "my message"} From d20060fd4b98983a3ed060a611379b1ae3d2ca71 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 11:20:49 +0100 Subject: [PATCH 06/13] fix authentication tests --- tests/unit/connector/test_http_input.py | 35 +++++++++++-------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 6ffc2c4b1..9022ad14f 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -14,7 +14,7 @@ import requests import responses from falcon import testing -from requests.auth import HTTPBasicAuth +from requests.auth import _basic_auth_str from logprep.abc.input import FatalInputError from logprep.connector.http.input import HttpInput @@ -325,9 +325,8 @@ def test_endpoint_returns_401_if_authorization_not_provided(self, credentials_fi new_connector = Factory.create({"test connector": self.CONFIG}) new_connector.pipeline_index = 1 new_connector.setup() - resp = requests.post( - url=f"{self.target}/auth-json-file", timeout=0.5, data=json.dumps(data) - ) + client = testing.TestClient(new_connector.app) + resp = client.post("/auth-json-file", body=json.dumps(data)) assert resp.status_code == 401 def test_endpoint_returns_401_on_wrong_authorization(self, credentials_file_path): @@ -337,10 +336,9 @@ def test_endpoint_returns_401_on_wrong_authorization(self, credentials_file_path new_connector = Factory.create({"test connector": self.CONFIG}) new_connector.pipeline_index = 1 new_connector.setup() - basic = HTTPBasicAuth("wrong", "credentials") - resp = requests.post( - url=f"{self.target}/auth-json-file", auth=basic, timeout=0.5, json=data - ) + headers = {"Authorization": _basic_auth_str("wrong", "credentials")} + client = testing.TestClient(new_connector.app, headers=headers) + resp = client.post("/auth-json-file", body=json.dumps(data)) assert resp.status_code == 401 def test_endpoint_returns_200_on_correct_authorization_with_password_from_file( @@ -352,10 +350,9 @@ def test_endpoint_returns_200_on_correct_authorization_with_password_from_file( new_connector = Factory.create({"test connector": self.CONFIG}) new_connector.pipeline_index = 1 new_connector.setup() - basic = HTTPBasicAuth("user", "file_password") - resp = requests.post( - url=f"{self.target}/auth-json-file", auth=basic, timeout=0.5, json=data - ) + headers = {"Authorization": _basic_auth_str("user", "file_password")} + client = testing.TestClient(new_connector.app, headers=headers) + resp = client.post("/auth-json-file", body=json.dumps(data)) assert resp.status_code == 200 def test_endpoint_returns_200_on_correct_authorization_with_password_within_credentials_file( @@ -367,10 +364,9 @@ def test_endpoint_returns_200_on_correct_authorization_with_password_within_cred new_connector = Factory.create({"test connector": self.CONFIG}) new_connector.pipeline_index = 1 new_connector.setup() - basic = HTTPBasicAuth("user", "secret_password") - resp = requests.post( - url=f"{self.target}/auth-json-secret", auth=basic, timeout=0.5, json=data - ) + headers = {"Authorization": _basic_auth_str("user", "secret_password")} + client = testing.TestClient(new_connector.app, headers=headers) + resp = client.post("/auth-json-secret", body=json.dumps(data)) assert resp.status_code == 200 def test_endpoint_returns_200_on_correct_authorization_for_subpath(self, credentials_file_path): @@ -380,10 +376,9 @@ def test_endpoint_returns_200_on_correct_authorization_for_subpath(self, credent new_connector = Factory.create({"test connector": self.CONFIG}) new_connector.pipeline_index = 1 new_connector.setup() - basic = HTTPBasicAuth("user", "password") - resp = requests.post( - url=f"{self.target}/auth-json-secret/AB/json", auth=basic, timeout=0.5, json=data - ) + headers = {"Authorization": _basic_auth_str("user", "password")} + client = testing.TestClient(new_connector.app, headers=headers) + resp = client.post("/auth-json-secret/AB/json", body=json.dumps(data)) assert resp.status_code == 200 def test_two_connector_instances_share_the_same_queue(self): From 627f5af369535e939276afcdb647042d47c3ceb8 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 11:41:59 +0100 Subject: [PATCH 07/13] set mock in `setup_method` and revert in `teardown_method` --- tests/unit/connector/test_http_input.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 9022ad14f..09f4aac9c 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -50,12 +50,12 @@ def create_credentials(tmp_path): original_thread_start = ThreadingHTTPServer.start -ThreadingHTTPServer.start = mock.MagicMock() class TestHttpConnector(BaseInputTestCase): def setup_method(self): + ThreadingHTTPServer.start = mock.MagicMock() HttpInput.messages = ThrottlingQueue( ctx=multiprocessing.get_context(), maxsize=self.CONFIG.get("message_backlog_size") ) @@ -94,6 +94,7 @@ def teardown_method(self): while not self.object.messages.empty(): self.object.messages.get(timeout=0.001) self.object.shut_down() + ThreadingHTTPServer.start = original_thread_start def test_create_connector(self): assert isinstance(self.object, HttpInput) From 16a6fe4444ee433a2f373ace71387912b5888899 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 13:30:43 +0100 Subject: [PATCH 08/13] add print for debug in ci --- logprep/processor/domain_resolver/processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index 3b5655651..bfb4415ea 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -212,6 +212,7 @@ def _add_resolve_infos_to_event(self, event, rule, resolved_ip): def _resolve_ip(self, domain, hash_string=None): try: + print(socket.gethostbyname) result = self._thread_pool.apply_async(socket.gethostbyname, (domain,)) resolved_ip = result.get(timeout=self._config.timeout) return resolved_ip From dfbdf10760373471e8893f97b3e1e3ec91afbbf6 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 13:35:40 +0100 Subject: [PATCH 09/13] remove print again --- logprep/processor/domain_resolver/processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index bfb4415ea..3b5655651 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -212,7 +212,6 @@ def _add_resolve_infos_to_event(self, event, rule, resolved_ip): def _resolve_ip(self, domain, hash_string=None): try: - print(socket.gethostbyname) result = self._thread_pool.apply_async(socket.gethostbyname, (domain,)) resolved_ip = result.get(timeout=self._config.timeout) return resolved_ip From 20e294583f14d721fbe6c784d7aff46cb12a91b7 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 13:41:01 +0100 Subject: [PATCH 10/13] add print again ... --- logprep/processor/domain_resolver/processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index 3b5655651..bfb4415ea 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -212,6 +212,7 @@ def _add_resolve_infos_to_event(self, event, rule, resolved_ip): def _resolve_ip(self, domain, hash_string=None): try: + print(socket.gethostbyname) result = self._thread_pool.apply_async(socket.gethostbyname, (domain,)) resolved_ip = result.get(timeout=self._config.timeout) return resolved_ip From e37c2742ac038b3ea7d42245b2708b62f3d8c47a Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 13:58:36 +0100 Subject: [PATCH 11/13] remove print and disable http_input mock again - test if the http_input mock has a side effect on the domain_resolver test --- logprep/processor/domain_resolver/processor.py | 1 - tests/unit/connector/test_http_input.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index bfb4415ea..3b5655651 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -212,7 +212,6 @@ def _add_resolve_infos_to_event(self, event, rule, resolved_ip): def _resolve_ip(self, domain, hash_string=None): try: - print(socket.gethostbyname) result = self._thread_pool.apply_async(socket.gethostbyname, (domain,)) resolved_ip = result.get(timeout=self._config.timeout) return resolved_ip diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 09f4aac9c..9f6e17477 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -55,7 +55,7 @@ def create_credentials(tmp_path): class TestHttpConnector(BaseInputTestCase): def setup_method(self): - ThreadingHTTPServer.start = mock.MagicMock() + # ThreadingHTTPServer.start = mock.MagicMock() HttpInput.messages = ThrottlingQueue( ctx=multiprocessing.get_context(), maxsize=self.CONFIG.get("message_backlog_size") ) @@ -94,7 +94,7 @@ def teardown_method(self): while not self.object.messages.empty(): self.object.messages.get(timeout=0.001) self.object.shut_down() - ThreadingHTTPServer.start = original_thread_start + # ThreadingHTTPServer.start = original_thread_start def test_create_connector(self): assert isinstance(self.object, HttpInput) From 80270f49ab3a616b52e1342d5ea67925becf8c7c Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 14:07:22 +0100 Subject: [PATCH 12/13] activate mock again --- tests/unit/connector/test_http_input.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 9f6e17477..7b4ead3f3 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -54,17 +54,6 @@ def create_credentials(tmp_path): class TestHttpConnector(BaseInputTestCase): - def setup_method(self): - # ThreadingHTTPServer.start = mock.MagicMock() - HttpInput.messages = ThrottlingQueue( - ctx=multiprocessing.get_context(), maxsize=self.CONFIG.get("message_backlog_size") - ) - super().setup_method() - self.object.pipeline_index = 1 - self.object.setup() - self.target = self.object.target - self.client = testing.TestClient(self.object.app) - CONFIG: dict = { "type": "http_input", "message_backlog_size": 100, @@ -90,11 +79,22 @@ def setup_method(self): "logprep_number_of_http_requests", ] + def setup_method(self): + ThreadingHTTPServer.start = mock.MagicMock() + HttpInput.messages = ThrottlingQueue( + ctx=multiprocessing.get_context(), maxsize=self.CONFIG.get("message_backlog_size") + ) + super().setup_method() + self.object.pipeline_index = 1 + self.object.setup() + self.target = self.object.target + self.client = testing.TestClient(self.object.app) + def teardown_method(self): while not self.object.messages.empty(): self.object.messages.get(timeout=0.001) self.object.shut_down() - # ThreadingHTTPServer.start = original_thread_start + ThreadingHTTPServer.start = original_thread_start def test_create_connector(self): assert isinstance(self.object, HttpInput) From da006dbd6b0c71eeee676d57f18837f1f7289e64 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Fri, 6 Dec 2024 15:40:01 +0100 Subject: [PATCH 13/13] clean up mock --- tests/unit/connector/test_http_input.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/unit/connector/test_http_input.py b/tests/unit/connector/test_http_input.py index 7b4ead3f3..e9ea6f5e4 100644 --- a/tests/unit/connector/test_http_input.py +++ b/tests/unit/connector/test_http_input.py @@ -21,7 +21,6 @@ from logprep.factory import Factory from logprep.framework.pipeline_manager import ThrottlingQueue from logprep.util.defaults import ENV_NAME_LOGPREP_CREDENTIALS_FILE -from logprep.util.http import ThreadingHTTPServer from tests.unit.connector.base import BaseInputTestCase @@ -49,9 +48,7 @@ def create_credentials(tmp_path): return str(credential_file_path) -original_thread_start = ThreadingHTTPServer.start - - +@mock.patch("logprep.connector.http.input.http.ThreadingHTTPServer", new=mock.MagicMock()) class TestHttpConnector(BaseInputTestCase): CONFIG: dict = { @@ -80,13 +77,15 @@ class TestHttpConnector(BaseInputTestCase): ] def setup_method(self): - ThreadingHTTPServer.start = mock.MagicMock() HttpInput.messages = ThrottlingQueue( ctx=multiprocessing.get_context(), maxsize=self.CONFIG.get("message_backlog_size") ) super().setup_method() self.object.pipeline_index = 1 - self.object.setup() + with mock.patch( + "logprep.connector.http.input.http.ThreadingHTTPServer", new=mock.MagicMock() + ): + self.object.setup() self.target = self.object.target self.client = testing.TestClient(self.object.app) @@ -94,7 +93,6 @@ def teardown_method(self): while not self.object.messages.empty(): self.object.messages.get(timeout=0.001) self.object.shut_down() - ThreadingHTTPServer.start = original_thread_start def test_create_connector(self): assert isinstance(self.object, HttpInput) @@ -247,9 +245,6 @@ def test_get_next_returns_first_in_first_out_for_mixed_endpoints(self): def test_get_next_returns_none_for_empty_queue(self): assert self.object.get_next(0.001) is None - def test_http_server_is_of_instance_threading_http_server(self): - assert isinstance(self.object.http_server, ThreadingHTTPServer) - def test_server_starts_threaded_server(self): message = {"message": "my message"} for i in range(90):