From ca6e085c656489a3458f7887219d2b33b02d7508 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Fri, 13 Dec 2024 15:42:08 +0000 Subject: [PATCH] Move logger to Context --- scraper/src/mindtouch2zim/__main__.py | 4 +++- scraper/src/mindtouch2zim/asset.py | 2 +- scraper/src/mindtouch2zim/client.py | 2 +- scraper/src/mindtouch2zim/constants.py | 9 --------- scraper/src/mindtouch2zim/context.py | 11 +++++++++-- scraper/src/mindtouch2zim/html_rewriting.py | 2 +- .../mindtouch2zim/libretexts/detailed_licensing.py | 2 +- scraper/src/mindtouch2zim/processor.py | 2 +- scraper/src/mindtouch2zim/utils.py | 5 ++++- scraper/src/mindtouch2zim/vimeo.py | 2 +- scraper/tests/test_context.py | 5 +++++ 11 files changed, 27 insertions(+), 19 deletions(-) diff --git a/scraper/src/mindtouch2zim/__main__.py b/scraper/src/mindtouch2zim/__main__.py index 25c94fb..a3db141 100644 --- a/scraper/src/mindtouch2zim/__main__.py +++ b/scraper/src/mindtouch2zim/__main__.py @@ -1,9 +1,11 @@ import sys import tempfile -from mindtouch2zim.constants import logger +from mindtouch2zim.context import Context from mindtouch2zim.entrypoint import prepare_context +logger = Context.logger + def main(): try: diff --git a/scraper/src/mindtouch2zim/asset.py b/scraper/src/mindtouch2zim/asset.py index be917d3..6c85291 100644 --- a/scraper/src/mindtouch2zim/asset.py +++ b/scraper/src/mindtouch2zim/asset.py @@ -17,7 +17,6 @@ from zimscraperlib.rewriting.url_rewriting import HttpUrl, ZimPath from zimscraperlib.zim import Creator -from mindtouch2zim.constants import logger from mindtouch2zim.context import Context from mindtouch2zim.download import stream_file from mindtouch2zim.errors import ( @@ -46,6 +45,7 @@ WEBP_OPTIONS = WebpMedium().options context = Context.get() +logger = context.logger class HeaderData(NamedTuple): diff --git a/scraper/src/mindtouch2zim/client.py b/scraper/src/mindtouch2zim/client.py index 3a2cdcc..fa6d227 100644 --- a/scraper/src/mindtouch2zim/client.py +++ b/scraper/src/mindtouch2zim/client.py @@ -7,12 +7,12 @@ from pydantic import BaseModel from requests import Response -from mindtouch2zim.constants import logger from mindtouch2zim.context import Context from mindtouch2zim.errors import APITokenRetrievalError, MindtouchParsingError from mindtouch2zim.html import get_soup context = Context.get() +logger = context.logger class MindtouchHome(BaseModel): diff --git a/scraper/src/mindtouch2zim/constants.py b/scraper/src/mindtouch2zim/constants.py index 70096b9..ad21299 100644 --- a/scraper/src/mindtouch2zim/constants.py +++ b/scraper/src/mindtouch2zim/constants.py @@ -1,8 +1,5 @@ -import logging import pathlib -from zimscraperlib.logging import DEFAULT_FORMAT_WITH_THREADS, getLogger - from mindtouch2zim.__about__ import __version__ NAME = "mindtouch2zim" @@ -15,9 +12,3 @@ STANDARD_KNOWN_BAD_ASSETS_REGEX = ( r"https?:\/\/(a\.mtstatic\.com\/@(cache|style)|localhost(:|\/))" ) - -# logger to use everywhere (not part of Context class because we need it early, before -# Context has been initialized) -logger: logging.Logger = getLogger( - NAME, level=logging.DEBUG, log_format=DEFAULT_FORMAT_WITH_THREADS -) diff --git a/scraper/src/mindtouch2zim/context.py b/scraper/src/mindtouch2zim/context.py index 9df7d84..6b7fbae 100644 --- a/scraper/src/mindtouch2zim/context.py +++ b/scraper/src/mindtouch2zim/context.py @@ -1,4 +1,5 @@ import dataclasses +import logging import os import re import threading @@ -7,12 +8,12 @@ import requests from zimscraperlib.constants import NAME as SCRAPERLIB_NAME from zimscraperlib.constants import VERSION as SCRAPERLIB_VERSION +from zimscraperlib.logging import DEFAULT_FORMAT_WITH_THREADS, getLogger from mindtouch2zim.constants import ( NAME, STANDARD_KNOWN_BAD_ASSETS_REGEX, VERSION, - logger, ) MINDTOUCH_TMP = os.getenv("MINDTOUCH_TMP") @@ -106,6 +107,12 @@ class Context: # Maximum number of pixels of images that will be pushed to the ZIM maximum_image_pixels: int = 1280 * 720 + # logger to use everywhere (do not mind about mutability, we want to reuse same + # logger everywhere) + logger: logging.Logger = getLogger( # noqa: RUF009 + NAME, level=logging.DEBUG, log_format=DEFAULT_FORMAT_WITH_THREADS + ) + @classmethod def setup(cls, **kwargs): new_instance = cls(**kwargs) @@ -132,7 +139,7 @@ def current_thread_workitem(self) -> str: @current_thread_workitem.setter def current_thread_workitem(self, value: str): self._current_thread_workitem.value = value - logger.debug(f"Processing {value}") + Context.logger.debug(f"Processing {value}") @property def wm_user_agent(self) -> str: diff --git a/scraper/src/mindtouch2zim/html_rewriting.py b/scraper/src/mindtouch2zim/html_rewriting.py index 2266bfc..5c41fd9 100644 --- a/scraper/src/mindtouch2zim/html_rewriting.py +++ b/scraper/src/mindtouch2zim/html_rewriting.py @@ -15,12 +15,12 @@ from mindtouch2zim.asset import AssetManager from mindtouch2zim.client import LibraryPage -from mindtouch2zim.constants import logger from mindtouch2zim.context import Context from mindtouch2zim.utils import is_better_srcset_descriptor from mindtouch2zim.vimeo import get_vimeo_thumbnail_url context = Context.get() +logger = context.logger # remove all standard rules, they are not adapted to Vue.JS UI html_rules.rewrite_attribute_rules.clear() diff --git a/scraper/src/mindtouch2zim/libretexts/detailed_licensing.py b/scraper/src/mindtouch2zim/libretexts/detailed_licensing.py index a9356a4..ab4bf77 100644 --- a/scraper/src/mindtouch2zim/libretexts/detailed_licensing.py +++ b/scraper/src/mindtouch2zim/libretexts/detailed_licensing.py @@ -5,11 +5,11 @@ from zimscraperlib.rewriting.html import HtmlRewriter from mindtouch2zim.client import LibraryPage, MindtouchClient -from mindtouch2zim.constants import logger from mindtouch2zim.context import Context from mindtouch2zim.libretexts.errors import BadBookPageError context = Context.get() +logger = context.logger class LicenseStatistic(BaseModel): diff --git a/scraper/src/mindtouch2zim/processor.py b/scraper/src/mindtouch2zim/processor.py index 70e0d02..1fba79a 100644 --- a/scraper/src/mindtouch2zim/processor.py +++ b/scraper/src/mindtouch2zim/processor.py @@ -43,7 +43,6 @@ NAME, ROOT_DIR, VERSION, - logger, ) from mindtouch2zim.context import Context from mindtouch2zim.download import stream_file @@ -64,6 +63,7 @@ from mindtouch2zim.zimconfig import ZimConfig context = Context.get() +logger = context.logger class ContentFilter(BaseModel): diff --git a/scraper/src/mindtouch2zim/utils.py b/scraper/src/mindtouch2zim/utils.py index b264cec..dafeead 100644 --- a/scraper/src/mindtouch2zim/utils.py +++ b/scraper/src/mindtouch2zim/utils.py @@ -2,7 +2,10 @@ from typing import Any from urllib.parse import urlparse -from mindtouch2zim.constants import logger +from mindtouch2zim.context import Context + +context = Context.get() +logger = context.logger def get_asset_path_from_url(online_url: str, already_used_paths: list[Path]) -> Path: diff --git a/scraper/src/mindtouch2zim/vimeo.py b/scraper/src/mindtouch2zim/vimeo.py index f4d84bc..53f376d 100644 --- a/scraper/src/mindtouch2zim/vimeo.py +++ b/scraper/src/mindtouch2zim/vimeo.py @@ -1,8 +1,8 @@ -from mindtouch2zim.constants import logger from mindtouch2zim.context import Context from mindtouch2zim.errors import VimeoThumbnailError context = Context.get() +logger = context.logger def get_vimeo_thumbnail_url(video_url: str) -> str: diff --git a/scraper/tests/test_context.py b/scraper/tests/test_context.py index e3932c0..244dffe 100644 --- a/scraper/tests/test_context.py +++ b/scraper/tests/test_context.py @@ -11,6 +11,11 @@ def context_defaults(): return CONTEXT_DEFAULTS +def test_context_logger(): + # ensure we have only one logger object everywhere + assert Context.logger == Context.get().logger + + def test_context_defaults(): context = Context.get() assert context == processor_context # check both objects are same