Skip to content

Commit

Permalink
Merge pull request #104 from kuefmz/fix_cmd_line_interface
Browse files Browse the repository at this point in the history
Fix cmd line interface
  • Loading branch information
JJ-Author authored Oct 21, 2024
2 parents 31eaec9 + df622db commit 305d521
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 58 deletions.
31 changes: 14 additions & 17 deletions ontologytimemachine/custom_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,18 @@
from http.client import responses
import proxy
import sys
import logging
from ontologytimemachine.utils.config import HttpsInterception, ClientConfigViaProxyAuth

from ontologytimemachine.utils.config import (
HttpsInterception,
ClientConfigViaProxyAuth,
logger,
)

IP = "0.0.0.0"
PORT = "8896"

default_cfg: Config = Config()
config = None

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
IP = default_cfg.host
PORT = default_cfg.port


class OntologyTimeMachinePlugin(HttpProxyBasePlugin):
Expand Down Expand Up @@ -188,14 +187,12 @@ def queue_response(self, response):
]

sys.argv += [
"--hostname", IP,
"--port", PORT,
'--insecure-tls-interception', # without it the proxy would not let through a response using an invalid upstream certificate in interception mode
# since there currently is a bug in proxypy when a connect request uses an IP address instead of a domain name
# the proxy would not be able to work corectly in transparent mode using 3proxy setup since it tries to match
# the IP address as hostname with the certificate instead of the domain name in the SNI field
"--log-level", config.logLevel.name,
"--plugins", __name__ + ".OntologyTimeMachinePlugin",
"--hostname",
config.host,
"--port",
config.port,
"--plugins",
__name__ + ".OntologyTimeMachinePlugin",
]

logger.info("Starting OntologyTimeMachineProxy server...")
Expand Down
8 changes: 1 addition & 7 deletions ontologytimemachine/proxy_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
from abc import ABC, abstractmethod
from proxy.http.parser import HttpParser
import logging
from typing import Tuple, Dict, Any
import base64

# Configure logger
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
from ontologytimemachine.utils.config import logger


class AbstractRequestWrapper(ABC):
Expand Down
58 changes: 48 additions & 10 deletions ontologytimemachine/utils/config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
import argparse
from dataclasses import dataclass, field
from enum import Enum
import logging
from typing import Dict, Any, Type, TypeVar


logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class EnumValuePrint(
Enum
): # redefine how the enum is printed such that it will show up properly the cmd help message (choices)
Expand Down Expand Up @@ -70,6 +77,8 @@ class Config:
httpsInterception: HttpsInterception = HttpsInterception.ALL
disableRemovingRedirects: bool = False
timestamp: str = ""
host: str = "0.0.0.0"
port: str = "8896"
# manifest: Dict[str, Any] = None


Expand All @@ -90,30 +99,33 @@ def enum_parser(enum_class: Type[E], value: str) -> E:

def parse_arguments(config_str: str = "") -> Config:
default_cfg: Config = Config()
parser = argparse.ArgumentParser(description="Process ontology format and version.")
parser = argparse.ArgumentParser(
description="Process ontology format and version.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)

# Defining ontoFormat argument with nested options
parser.add_argument(
"--ontoFormat",
type=lambda s: enum_parser(OntoFormat, s),
default=default_cfg.ontoFormatConf.format,
choices=list(OntoFormat),
help="Format of the ontology: turtle, ntriples, rdfxml, htmldocu",
help="Format of the ontology: turtle, ntriples, rdfxml, htmldocu. (default: %(default)s)",
)

parser.add_argument(
"--ontoPrecedence",
type=lambda s: enum_parser(OntoPrecedence, s),
default=default_cfg.ontoFormatConf.precedence,
choices=list(OntoPrecedence),
help="Precedence of the ontology: default, enforcedPriority, always",
help="Precedence of the ontology: default, enforcedPriority, always. (default: %(default)s)",
)

parser.add_argument(
"--patchAcceptUpstream",
type=bool,
default=default_cfg.ontoFormatConf.patchAcceptUpstream,
help="Defines if the Accept Header is patched upstream in original mode.",
help="Defines if the Accept Header is patched upstream in original mode. (default: %(default)s)",
)

# Defining ontoVersion argument
Expand All @@ -122,15 +134,15 @@ def parse_arguments(config_str: str = "") -> Config:
type=lambda s: enum_parser(OntoVersion, s),
default=default_cfg.ontoVersion,
choices=list(OntoVersion),
help="Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest",
help="Version of the ontology: original, originalFailoverLive, originalFailoverArchivoMonitor, latestArchive, timestampArchive, dependencyManifest. (default: %(default)s)",
)

# Enable/disable mode to only proxy requests to ontologies
parser.add_argument(
"--restrictedAccess",
type=bool,
default=default_cfg.restrictedAccess,
help="Enable/disable mode to only proxy requests to ontologies stored in Archivo.",
help="Enable/disable mode to only proxy requests to ontologies stored in Archivo. (default: %(default)s)",
)

# Enable HTTPS interception for specific domains
Expand All @@ -139,23 +151,23 @@ def parse_arguments(config_str: str = "") -> Config:
type=lambda s: enum_parser(HttpsInterception, s),
default=default_cfg.httpsInterception,
choices=list(HttpsInterception),
help="Enable HTTPS interception for specific domains: none, archivo, all, listfilename.",
help="Enable HTTPS interception for specific domains: none, archivo, all, listfilename. (default: %(default)s)",
)

# Enable/disable inspecting or removing redirects
parser.add_argument(
"--disableRemovingRedirects",
type=bool,
default=default_cfg.disableRemovingRedirects,
help="Enable/disable inspecting or removing redirects.",
help="Enable/disable inspecting or removing redirects. (default: %(default)s)",
)

parser.add_argument(
"--clientConfigViaProxyAuth",
type=lambda s: enum_parser(ClientConfigViaProxyAuth, s),
default=default_cfg.clientConfigViaProxyAuth,
choices=list(ClientConfigViaProxyAuth),
help="Define the configuration of the proxy via the proxy auth.",
help="Define the configuration of the proxy via the proxy auth. (default: %(default)s)",
)

# Log level
Expand All @@ -164,7 +176,23 @@ def parse_arguments(config_str: str = "") -> Config:
type=lambda s: enum_parser(LogLevel, s),
default=default_cfg.logLevel,
choices=list(LogLevel),
help="Level of the logging: debug, info, warning, error.",
help="Level of the logging: debug, info, warning, error. (default: %(default)s)",
)

# Host
parser.add_argument(
"--host",
type=str,
default=default_cfg.host,
help="Hostname or IP address to bind the proxy to. (default: %(default)s)",
)

# Port
parser.add_argument(
"--port",
type=str,
default=default_cfg.port,
help="Port number to bind the proxy to. (default: %(default)s)",
)

if config_str:
Expand Down Expand Up @@ -194,6 +222,14 @@ def parse_arguments(config_str: str = "") -> Config:
# print the default configuration with all nested members
# print(default_cfg) # TODO remove

if args.logLevel != LogLevel.INFO:
logging.basicConfig(
level=args.logLevel.value,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
logger.info(f"Logging level set to: {args.logLevel}")

# Initialize the Config class with parsed arguments
config = Config(
logLevel=args.logLevel,
Expand All @@ -206,6 +242,8 @@ def parse_arguments(config_str: str = "") -> Config:
clientConfigViaProxyAuth=args.clientConfigViaProxyAuth,
disableRemovingRedirects=args.disableRemovingRedirects,
timestamp=args.timestamp if hasattr(args, "timestamp") else "",
host=args.host,
port=args.port,
)

return config
8 changes: 1 addition & 7 deletions ontologytimemachine/utils/download_archivo_urls.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os
import hashlib
import logging
import requests
import schedule
import time
import csv
from datetime import datetime, timedelta
from urllib.parse import urlparse
from typing import Set, Tuple
from ontologytimemachine.utils.config import logger


ARCHIVO_PARSED_URLS: Set[Tuple[str, str]] = set()
Expand All @@ -22,12 +22,6 @@
DOWNLOAD_INTERVAL = timedelta(days=1) # 1 day interval for checking the download


logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


def schedule_daily_download():
"""Schedule the download to run at 3 AM every day."""
schedule.every().day.at("03:00").do(download_archivo_urls)
Expand Down
8 changes: 1 addition & 7 deletions ontologytimemachine/utils/proxy_logic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
import requests
from ontologytimemachine.utils.config import parse_arguments
from ontologytimemachine.proxy_wrapper import AbstractRequestWrapper
Expand Down Expand Up @@ -26,15 +25,10 @@
OntoVersion,
HttpsInterception,
ClientConfigViaProxyAuth,
logger,
)


logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


def do_block_CONNECT_request(config: Config) -> bool:
if config.httpsInterception == HttpsInterception.BLOCK:
logger.info("decided to block CONNECT request due to config enum")
Expand Down
13 changes: 5 additions & 8 deletions ontologytimemachine/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import logging
import argparse
from werkzeug.http import parse_accept_header
from ontologytimemachine.utils.config import OntoVersion, OntoFormat, OntoPrecedence


logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
from ontologytimemachine.utils.config import (
OntoVersion,
OntoFormat,
OntoPrecedence,
logger,
)
logger = logging.getLogger(__name__)


archivo_api = "https://archivo.dbpedia.org/download"
Expand Down
14 changes: 12 additions & 2 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,24 @@ def test_15_linked_web_apis():


def generic_test(iri, content_type):
response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH)
response = requests.get(
iri,
proxies=PROXIES,
verify=CA_CERT_PATH,
auth=HTTPBasicAuth("admin", "archivo"),
)
assert response.status_code == 200
assert iri in response.content.decode("utf-8")


def iri_generic_test(iri):
try:
response = requests.get(iri, proxies=PROXIES, verify=CA_CERT_PATH)
response = requests.get(
iri,
proxies=PROXIES,
verify=CA_CERT_PATH,
auth=HTTPBasicAuth("admin", "archivo"),
)
assert response.status_code == 200
assert iri in response.content.decode("utf-8")
except AssertionError:
Expand Down

0 comments on commit 305d521

Please sign in to comment.