From 05ecc1b15d445b55e2e00ffe9f96eebe8363bbd7 Mon Sep 17 00:00:00 2001 From: Jenifer Tabita Ciuciu-Kiss Date: Mon, 21 Oct 2024 02:46:03 +0200 Subject: [PATCH 1/5] add host and port parameters --- ontologytimemachine/custom_proxy.py | 11 ++++++----- ontologytimemachine/utils/config.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/ontologytimemachine/custom_proxy.py b/ontologytimemachine/custom_proxy.py index 37d8f98..8110824 100644 --- a/ontologytimemachine/custom_proxy.py +++ b/ontologytimemachine/custom_proxy.py @@ -21,11 +21,12 @@ from ontologytimemachine.utils.config import HttpsInterception, ClientConfigViaProxyAuth -IP = "0.0.0.0" -PORT = "8896" - +default_cfg: Config = Config() config = None +IP = default_cfg.port +PORT = default_cfg.host + logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) @@ -189,9 +190,9 @@ def queue_response(self, response): sys.argv += [ "--hostname", - IP, + config.host, "--port", - PORT, + config.port, "--plugins", __name__ + ".OntologyTimeMachinePlugin", ] diff --git a/ontologytimemachine/utils/config.py b/ontologytimemachine/utils/config.py index 8e3f9e9..5306a77 100644 --- a/ontologytimemachine/utils/config.py +++ b/ontologytimemachine/utils/config.py @@ -69,6 +69,8 @@ class Config: httpsInterception: HttpsInterception = HttpsInterception.ALL disableRemovingRedirects: bool = False timestamp: str = "" + host: str = "0.0.0.0" + port: str = "8896" # manifest: Dict[str, Any] = None @@ -166,6 +168,22 @@ def parse_arguments(config_str: str = "") -> Config: help="Level of the logging: debug, info, warning, error.", ) + # Host + parser.add_argument( + "--host", + type=str, + default=default_cfg.host, + help="Hostname or IP address to bind the proxy to. Default is '0.0.0.0'.", + ) + + # Port + parser.add_argument( + "--port", + type=str, + default=default_cfg.port, + help="Port number to bind the proxy to. Default is 8896.", + ) + if config_str: args = parser.parse_args(config_str) else: @@ -205,6 +223,8 @@ def parse_arguments(config_str: str = "") -> Config: clientConfigViaProxyAuth=args.clientConfigViaProxyAuth, disableRemovingRedirects=args.disableRemovingRedirects, timestamp=args.timestamp if hasattr(args, "timestamp") else "", + host=args.host, + port=args.port, ) return config From 448217b46b3d34167fb9c7ed32f86233ad5d1c0c Mon Sep 17 00:00:00 2001 From: Jenifer Tabita Ciuciu-Kiss Date: Mon, 21 Oct 2024 02:57:35 +0200 Subject: [PATCH 2/5] define logging in config, adjust based on the logLevel parameter and import this from all the other files --- ontologytimemachine/custom_proxy.py | 12 +++++------- ontologytimemachine/proxy_wrapper.py | 8 +------- ontologytimemachine/utils/config.py | 15 +++++++++++++++ .../utils/download_archivo_urls.py | 8 +------- ontologytimemachine/utils/proxy_logic.py | 8 +------- ontologytimemachine/utils/utils.py | 13 +++++-------- 6 files changed, 28 insertions(+), 36 deletions(-) diff --git a/ontologytimemachine/custom_proxy.py b/ontologytimemachine/custom_proxy.py index 8110824..5ac09f2 100644 --- a/ontologytimemachine/custom_proxy.py +++ b/ontologytimemachine/custom_proxy.py @@ -17,8 +17,11 @@ from http.client import responses import proxy import sys -import logging -from ontologytimemachine.utils.config import HttpsInterception, ClientConfigViaProxyAuth +from ontologytimemachine.utils.config import ( + HttpsInterception, + ClientConfigViaProxyAuth, + logger, +) default_cfg: Config = Config() @@ -27,11 +30,6 @@ IP = default_cfg.port PORT = default_cfg.host -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - class OntologyTimeMachinePlugin(HttpProxyBasePlugin): def __init__(self, *args, **kwargs): diff --git a/ontologytimemachine/proxy_wrapper.py b/ontologytimemachine/proxy_wrapper.py index 37d6e74..c612db0 100644 --- a/ontologytimemachine/proxy_wrapper.py +++ b/ontologytimemachine/proxy_wrapper.py @@ -1,14 +1,8 @@ from abc import ABC, abstractmethod from proxy.http.parser import HttpParser -import logging from typing import Tuple, Dict, Any import base64 - -# Configure logger -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) +from ontologytimemachine.utils.config import logger class AbstractRequestWrapper(ABC): diff --git a/ontologytimemachine/utils/config.py b/ontologytimemachine/utils/config.py index 5306a77..4fa5870 100644 --- a/ontologytimemachine/utils/config.py +++ b/ontologytimemachine/utils/config.py @@ -1,9 +1,16 @@ import argparse from dataclasses import dataclass, field from enum import Enum +import logging from typing import Dict, Any, Type, TypeVar +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + class EnumValuePrint( Enum ): # redefine how the enum is printed such that it will show up properly the cmd help message (choices) @@ -211,6 +218,14 @@ def parse_arguments(config_str: str = "") -> Config: # print the default configuration with all nested members # print(default_cfg) # TODO remove + if args.logLevel != LogLevel.INFO: + logging.basicConfig( + level=args.logLevel.value, + format="%(asctime)s - %(levelname)s - %(message)s", + ) + logger = logging.getLogger(__name__) + logger.info(f"Logging level set to: {args.logLevel}") + # Initialize the Config class with parsed arguments config = Config( logLevel=args.logLevel, diff --git a/ontologytimemachine/utils/download_archivo_urls.py b/ontologytimemachine/utils/download_archivo_urls.py index 030fff5..ba68c01 100644 --- a/ontologytimemachine/utils/download_archivo_urls.py +++ b/ontologytimemachine/utils/download_archivo_urls.py @@ -1,6 +1,5 @@ import os import hashlib -import logging import requests import schedule import time @@ -8,6 +7,7 @@ from datetime import datetime, timedelta from urllib.parse import urlparse from typing import Set, Tuple +from ontologytimemachine.utils.config import logger ARCHIVO_PARSED_URLS: Set[Tuple[str, str]] = set() @@ -22,12 +22,6 @@ DOWNLOAD_INTERVAL = timedelta(days=1) # 1 day interval for checking the download -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - - def schedule_daily_download(): """Schedule the download to run at 3 AM every day.""" schedule.every().day.at("03:00").do(download_archivo_urls) diff --git a/ontologytimemachine/utils/proxy_logic.py b/ontologytimemachine/utils/proxy_logic.py index b51bfae..fb3f294 100644 --- a/ontologytimemachine/utils/proxy_logic.py +++ b/ontologytimemachine/utils/proxy_logic.py @@ -1,4 +1,3 @@ -import logging import requests from ontologytimemachine.utils.config import parse_arguments from ontologytimemachine.proxy_wrapper import AbstractRequestWrapper @@ -26,15 +25,10 @@ OntoVersion, HttpsInterception, ClientConfigViaProxyAuth, + logger, ) -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - - def do_block_CONNECT_request(config: Config) -> bool: if config.httpsInterception == HttpsInterception.BLOCK: logger.info("decided to block CONNECT request due to config enum") diff --git a/ontologytimemachine/utils/utils.py b/ontologytimemachine/utils/utils.py index f51267c..7e511a2 100644 --- a/ontologytimemachine/utils/utils.py +++ b/ontologytimemachine/utils/utils.py @@ -1,13 +1,10 @@ -import logging -import argparse from werkzeug.http import parse_accept_header -from ontologytimemachine.utils.config import OntoVersion, OntoFormat, OntoPrecedence - - -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +from ontologytimemachine.utils.config import ( + OntoVersion, + OntoFormat, + OntoPrecedence, + logger, ) -logger = logging.getLogger(__name__) archivo_api = "https://archivo.dbpedia.org/download" From b9fe6d10441b1005200b25afe0c86ca966ec1ec8 Mon Sep 17 00:00:00 2001 From: Jenifer Tabita Ciuciu-Kiss Date: Mon, 21 Oct 2024 03:03:17 +0200 Subject: [PATCH 3/5] show default values for config help --- ontologytimemachine/utils/config.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/ontologytimemachine/utils/config.py b/ontologytimemachine/utils/config.py index 4fa5870..c011467 100644 --- a/ontologytimemachine/utils/config.py +++ b/ontologytimemachine/utils/config.py @@ -98,7 +98,10 @@ def enum_parser(enum_class: Type[E], value: str) -> E: def parse_arguments(config_str: str = "") -> Config: default_cfg: Config = Config() - parser = argparse.ArgumentParser(description="Process ontology format and version.") + parser = argparse.ArgumentParser( + description="Process ontology format and version.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) # Defining ontoFormat argument with nested options parser.add_argument( @@ -106,7 +109,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoFormat, s), default=default_cfg.ontoFormatConf.format, choices=list(OntoFormat), - help="Format of the ontology: turtle, ntriples, rdfxml, htmldocu", + help="Format of the ontology: turtle, ntriples, rdfxml, htmldocu. (default: %(default)s)", ) parser.add_argument( @@ -114,14 +117,14 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoPrecedence, s), default=default_cfg.ontoFormatConf.precedence, choices=list(OntoPrecedence), - help="Precedence of the ontology: default, enforcedPriority, always", + help="Precedence of the ontology: default, enforcedPriority, always. (default: %(default)s)", ) parser.add_argument( "--patchAcceptUpstream", type=bool, default=default_cfg.ontoFormatConf.patchAcceptUpstream, - help="Defines if the Accept Header is patched upstream in original mode.", + help="Defines if the Accept Header is patched upstream in original mode. (default: %(default)s)", ) # Defining ontoVersion argument @@ -130,7 +133,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(OntoVersion, s), default=default_cfg.ontoVersion, choices=list(OntoVersion), - help="Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest", + help="Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest. (default: %(default)s)", ) # Enable/disable mode to only proxy requests to ontologies @@ -138,7 +141,7 @@ def parse_arguments(config_str: str = "") -> Config: "--restrictedAccess", type=bool, default=default_cfg.restrictedAccess, - help="Enable/disable mode to only proxy requests to ontologies stored in Archivo.", + help="Enable/disable mode to only proxy requests to ontologies stored in Archivo. (default: %(default)s)", ) # Enable HTTPS interception for specific domains @@ -147,7 +150,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(HttpsInterception, s), default=default_cfg.httpsInterception, choices=list(HttpsInterception), - help="Enable HTTPS interception for specific domains: none, archivo, all, listfilename.", + help="Enable HTTPS interception for specific domains: none, archivo, all, listfilename. (default: %(default)s)", ) # Enable/disable inspecting or removing redirects @@ -155,7 +158,7 @@ def parse_arguments(config_str: str = "") -> Config: "--disableRemovingRedirects", type=bool, default=default_cfg.disableRemovingRedirects, - help="Enable/disable inspecting or removing redirects.", + help="Enable/disable inspecting or removing redirects. (default: %(default)s)", ) parser.add_argument( @@ -163,7 +166,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(ClientConfigViaProxyAuth, s), default=default_cfg.clientConfigViaProxyAuth, choices=list(ClientConfigViaProxyAuth), - help="Define the configuration of the proxy via the proxy auth.", + help="Define the configuration of the proxy via the proxy auth. (default: %(default)s)", ) # Log level @@ -172,7 +175,7 @@ def parse_arguments(config_str: str = "") -> Config: type=lambda s: enum_parser(LogLevel, s), default=default_cfg.logLevel, choices=list(LogLevel), - help="Level of the logging: debug, info, warning, error.", + help="Level of the logging: debug, info, warning, error. (default: %(default)s)", ) # Host @@ -180,7 +183,7 @@ def parse_arguments(config_str: str = "") -> Config: "--host", type=str, default=default_cfg.host, - help="Hostname or IP address to bind the proxy to. Default is '0.0.0.0'.", + help="Hostname or IP address to bind the proxy to. (default: %(default)s)", ) # Port @@ -188,7 +191,7 @@ def parse_arguments(config_str: str = "") -> Config: "--port", type=str, default=default_cfg.port, - help="Port number to bind the proxy to. Default is 8896.", + help="Port number to bind the proxy to. (default: %(default)s)", ) if config_str: From ef8bbc6cc91d1cfc3117620aca9b22a03a0822d1 Mon Sep 17 00:00:00 2001 From: Jenifer Tabita Ciuciu-Kiss Date: Mon, 21 Oct 2024 03:15:10 +0200 Subject: [PATCH 4/5] fix broken testcase --- tests/test_integration.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_integration.py b/tests/test_integration.py index afb903a..35169ef 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,9 +4,14 @@ import time import subprocess import itertools -from ontologytimemachine.custom_proxy import IP, PORT +from ontologytimemachine.utils.config import Config +default_cfg: Config = Config() + +IP = default_cfg.port +PORT = default_cfg.host + PROXY = f"{IP}:{PORT}" HTTP_PROXY = f"http://{PROXY}" HTTPS_PROXY = f"http://{PROXY}" From df622dbf1a4f6612c7cf509579111e9bb5438197 Mon Sep 17 00:00:00 2001 From: Jenifer Tabita Ciuciu-Kiss Date: Mon, 21 Oct 2024 03:17:10 +0200 Subject: [PATCH 5/5] fix broken testcase --- ontologytimemachine/custom_proxy.py | 4 ++-- tests/test_integration.py | 21 +++++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/ontologytimemachine/custom_proxy.py b/ontologytimemachine/custom_proxy.py index afbb789..bb13590 100644 --- a/ontologytimemachine/custom_proxy.py +++ b/ontologytimemachine/custom_proxy.py @@ -27,8 +27,8 @@ default_cfg: Config = Config() config = None -IP = default_cfg.port -PORT = default_cfg.host +IP = default_cfg.host +PORT = default_cfg.port class OntologyTimeMachinePlugin(HttpProxyBasePlugin): diff --git a/tests/test_integration.py b/tests/test_integration.py index 35169ef..26150d0 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,14 +4,9 @@ import time import subprocess import itertools -from ontologytimemachine.utils.config import Config +from ontologytimemachine.custom_proxy import IP, PORT -default_cfg: Config = Config() - -IP = default_cfg.port -PORT = default_cfg.host - PROXY = f"{IP}:{PORT}" HTTP_PROXY = f"http://{PROXY}" HTTPS_PROXY = f"http://{PROXY}" @@ -116,14 +111,24 @@ def test_15_linked_web_apis(): def generic_test(iri, content_type): - response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH) + response = requests.get( + iri, + proxies=PROXIES, + verify=CA_CERT_PATH, + auth=HTTPBasicAuth("admin", "archivo"), + ) assert response.status_code == 200 assert iri in response.content.decode("utf-8") def iri_generic_test(iri): try: - response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH) + response = requests.get( + iri, + proxies=PROXIES, + verify=CA_CERT_PATH, + auth=HTTPBasicAuth("admin", "archivo"), + ) assert response.status_code == 200 assert iri in response.content.decode("utf-8") except AssertionError: