From 0f31eaf82a4561306e96a5c87aeaa13c7bd62c69 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 11:21:55 +0200 Subject: [PATCH 01/13] add dependency `rich` --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1f794d9d..dfa6e469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "numpy", "pandas", "pyteomics", + "rich", "scipy", "sortedcontainers", "tqdm", From 493597d58528fb4c3a3c184c3428e788a8671964 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 11:24:50 +0200 Subject: [PATCH 02/13] remove logging NullHandler from modules --- src/nplinker/genomics/__init__.py | 3 --- src/nplinker/genomics/antismash/__init__.py | 3 --- src/nplinker/genomics/bigscape/__init__.py | 3 --- src/nplinker/genomics/mibig/__init__.py | 3 --- src/nplinker/metabolomics/__init__.py | 4 ---- src/nplinker/metabolomics/gnps/__init__.py | 3 --- src/nplinker/schemas/__init__.py | 3 --- src/nplinker/scoring/__init__.py | 3 --- src/nplinker/scoring/linking/__init__.py | 3 --- src/nplinker/strain/__init__.py | 3 --- 10 files changed, 31 deletions(-) diff --git a/src/nplinker/genomics/__init__.py b/src/nplinker/genomics/__init__.py index 710dad50..0758b87f 100644 --- a/src/nplinker/genomics/__init__.py +++ b/src/nplinker/genomics/__init__.py @@ -1,10 +1,7 @@ -import logging from .bgc import BGC from .gcf import GCF -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = [ "BGC", "GCF", diff --git a/src/nplinker/genomics/antismash/__init__.py b/src/nplinker/genomics/antismash/__init__.py index 0d813194..e126f548 100644 --- a/src/nplinker/genomics/antismash/__init__.py +++ b/src/nplinker/genomics/antismash/__init__.py @@ -1,4 +1,3 @@ -import logging from .antismash_downloader import download_and_extract_antismash_data from .antismash_loader import AntismashBGCLoader from .antismash_loader import parse_bgc_genbank @@ -7,8 +6,6 @@ from .podp_antismash_downloader import podp_download_and_extract_antismash_data -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = [ "download_and_extract_antismash_data", "AntismashBGCLoader", diff --git a/src/nplinker/genomics/bigscape/__init__.py b/src/nplinker/genomics/bigscape/__init__.py index 82c39239..820c09ba 100644 --- a/src/nplinker/genomics/bigscape/__init__.py +++ b/src/nplinker/genomics/bigscape/__init__.py @@ -1,9 +1,6 @@ -import logging from .bigscape_loader import BigscapeGCFLoader from .bigscape_loader import BigscapeV2GCFLoader from .runbigscape import run_bigscape -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = ["BigscapeGCFLoader", "BigscapeV2GCFLoader", "run_bigscape"] diff --git a/src/nplinker/genomics/mibig/__init__.py b/src/nplinker/genomics/mibig/__init__.py index d2e1f0d8..e68c44a6 100644 --- a/src/nplinker/genomics/mibig/__init__.py +++ b/src/nplinker/genomics/mibig/__init__.py @@ -1,12 +1,9 @@ -import logging from .mibig_downloader import download_and_extract_mibig_metadata from .mibig_loader import MibigLoader from .mibig_loader import parse_bgc_metadata_json from .mibig_metadata import MibigMetadata -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = [ "download_and_extract_mibig_metadata", "MibigLoader", diff --git a/src/nplinker/metabolomics/__init__.py b/src/nplinker/metabolomics/__init__.py index e7dc79c1..98955bad 100644 --- a/src/nplinker/metabolomics/__init__.py +++ b/src/nplinker/metabolomics/__init__.py @@ -1,11 +1,7 @@ -import logging from .molecular_family import MolecularFamily from .spectrum import Spectrum -logging.getLogger(__name__).addHandler(logging.NullHandler()) - - __all__ = [ "MolecularFamily", "Spectrum", diff --git a/src/nplinker/metabolomics/gnps/__init__.py b/src/nplinker/metabolomics/gnps/__init__.py index f73f0fec..507c6fa1 100644 --- a/src/nplinker/metabolomics/gnps/__init__.py +++ b/src/nplinker/metabolomics/gnps/__init__.py @@ -1,4 +1,3 @@ -import logging from .gnps_annotation_loader import GNPSAnnotationLoader from .gnps_downloader import GNPSDownloader from .gnps_extractor import GNPSExtractor @@ -11,8 +10,6 @@ from .gnps_spectrum_loader import GNPSSpectrumLoader -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = [ "GNPSAnnotationLoader", "GNPSDownloader", diff --git a/src/nplinker/schemas/__init__.py b/src/nplinker/schemas/__init__.py index 31737ed3..f3362bb9 100644 --- a/src/nplinker/schemas/__init__.py +++ b/src/nplinker/schemas/__init__.py @@ -1,12 +1,9 @@ import json -import logging from pathlib import Path from .utils import PODP_ADAPTED_SCHEMA from .utils import validate_podp_json -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = [ "GENOME_STATUS_SCHEMA", "GENOME_BGC_MAPPINGS_SCHEMA", diff --git a/src/nplinker/scoring/__init__.py b/src/nplinker/scoring/__init__.py index 0aaefeaf..10c04292 100644 --- a/src/nplinker/scoring/__init__.py +++ b/src/nplinker/scoring/__init__.py @@ -1,10 +1,7 @@ -import logging from .link_collection import LinkCollection from .metcalf_scoring import MetcalfScoring from .methods import ScoringMethod from .object_link import ObjectLink -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = ["LinkCollection", "MetcalfScoring", "ScoringMethod", "ObjectLink"] diff --git a/src/nplinker/scoring/linking/__init__.py b/src/nplinker/scoring/linking/__init__.py index f103d49f..bd391697 100644 --- a/src/nplinker/scoring/linking/__init__.py +++ b/src/nplinker/scoring/linking/__init__.py @@ -1,4 +1,3 @@ -import logging from .data_links import LINK_TYPES from .data_links import DataLinks from .link_finder import LinkFinder @@ -6,6 +5,4 @@ from .utils import isinstance_all -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = ["DataLinks", "LINK_TYPES", "LinkFinder", "calc_correlation_matrix", "isinstance_all"] diff --git a/src/nplinker/strain/__init__.py b/src/nplinker/strain/__init__.py index d4ad376d..3ff7613c 100644 --- a/src/nplinker/strain/__init__.py +++ b/src/nplinker/strain/__init__.py @@ -1,8 +1,5 @@ -import logging from .strain import Strain from .strain_collection import StrainCollection -logging.getLogger(__name__).addHandler(logging.NullHandler()) - __all__ = ["Strain", "StrainCollection"] From d745f0e06fb05510564b2b33fd494a5316539bba Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 14:13:15 +0200 Subject: [PATCH 03/13] add function `setup_logging` --- src/nplinker/__init__.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/nplinker/__init__.py b/src/nplinker/__init__.py index 612fc032..7fdfe99d 100644 --- a/src/nplinker/__init__.py +++ b/src/nplinker/__init__.py @@ -6,3 +6,45 @@ __author__ = "Cunliang Geng" __email__ = "c.geng@esciencecenter.nl" __version__ = "2.0.0-alpha.1" + + +def setup_logging(level: str = "INFO", file: str = "", use_console: bool = True) -> None: + """Setup logging configuration for the ancestor logger "nplinker". + + Args: + level: The log level, use the logging module's log level constants. Valid levels are: + "NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". + file: The file to write the log to. If the file does not exist, it will be created. The log + will be written to the file in append mode. If the file is an empty string (by default), + the log will not be written to a file. + use_console: Whether to log to the console. + """ + from rich.console import Console + from rich.logging import RichHandler + + # Get the acncestor logger "nplinker" + logger = logging.getLogger(__name__.split(".")[0]) + logger.setLevel(level) + + # File handler + if file: + logger.addHandler( + RichHandler( + console=Console(file=open(file, "a"), width=120), # force the line width to 120 + omit_repeated_times=False, + rich_tracebacks=True, + tracebacks_show_locals=True, + log_time_format="[%Y-%m-%d %X]", + ) + ) + + # Console handler + if use_console: + logger.addHandler( + RichHandler( + omit_repeated_times=False, + rich_tracebacks=True, + tracebacks_show_locals=True, + log_time_format="[%Y-%m-%d %X]", + ) + ) From dd7004f0007ba699ea7eb969f0f9666584e72d08 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 14:41:47 +0200 Subject: [PATCH 04/13] use standard logging instead of `LogConfig` --- src/nplinker/class_info/chem_classes.py | 5 ++--- src/nplinker/class_info/class_matches.py | 5 ++--- src/nplinker/class_info/runcanopus.py | 5 ++--- src/nplinker/genomics/antismash/antismash_downloader.py | 4 ++-- src/nplinker/genomics/antismash/antismash_loader.py | 4 ++-- .../genomics/antismash/podp_antismash_downloader.py | 4 ++-- src/nplinker/genomics/bgc.py | 4 ++-- src/nplinker/genomics/bigscape/bigscape_loader.py | 4 ++-- src/nplinker/genomics/bigscape/runbigscape.py | 4 ++-- src/nplinker/genomics/gcf.py | 4 ++-- src/nplinker/genomics/mibig/mibig_downloader.py | 4 ++-- src/nplinker/genomics/mibig/mibig_loader.py | 4 ++-- src/nplinker/genomics/utils.py | 4 ++-- src/nplinker/loader.py | 4 ++-- src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py | 4 ++-- src/nplinker/metabolomics/utils.py | 4 ++-- src/nplinker/parsers/kcb.py | 5 ++--- src/nplinker/pickler.py | 5 ++--- src/nplinker/scoring/iokr/IOKR_deprecated.py | 7 ++----- src/nplinker/scoring/iokr/iokrdata.py | 5 ++--- src/nplinker/scoring/iokr/nplinker_iokr.py | 5 ++--- src/nplinker/scoring/iokr/spectrum_filters.py | 1 + src/nplinker/scoring/link_collection.py | 4 ++-- src/nplinker/scoring/linking/data_links.py | 4 ++-- src/nplinker/scoring/linking/link_finder.py | 4 ++-- src/nplinker/scoring/linking/link_likelihood.py | 4 ++-- src/nplinker/scoring/metcalf_scoring.py | 4 ++-- src/nplinker/scoring/methods.py | 5 ++--- src/nplinker/scoring/np_class_scoring.py | 4 ++-- src/nplinker/scoring/rosetta/rosetta.py | 5 ++--- src/nplinker/scoring/rosetta/spec_lib.py | 5 ++--- src/nplinker/scoring/rosetta_scoring.py | 4 ++-- src/nplinker/strain/strain.py | 4 ++-- src/nplinker/strain/strain_collection.py | 4 ++-- src/nplinker/strain/utils.py | 4 ++-- 35 files changed, 69 insertions(+), 81 deletions(-) diff --git a/src/nplinker/class_info/chem_classes.py b/src/nplinker/class_info/chem_classes.py index e859db36..8477ceef 100644 --- a/src/nplinker/class_info/chem_classes.py +++ b/src/nplinker/class_info/chem_classes.py @@ -11,16 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import glob +import logging import os from collections import Counter from canopus import Canopus from canopus.classifications_to_gnps import analyse_canopus -from ..logconfig import LogConfig -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) # load Chem_class_predictions (canopus, molnetenhancer are loaded) diff --git a/src/nplinker/class_info/class_matches.py b/src/nplinker/class_info/class_matches.py index 1c1f8065..2e3ace98 100644 --- a/src/nplinker/class_info/class_matches.py +++ b/src/nplinker/class_info/class_matches.py @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging import os from collections import Counter from collections import defaultdict import pandas as pd -from ..logconfig import LogConfig -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class ClassMatches: diff --git a/src/nplinker/class_info/runcanopus.py b/src/nplinker/class_info/runcanopus.py index 88da87e4..6108e7e5 100644 --- a/src/nplinker/class_info/runcanopus.py +++ b/src/nplinker/class_info/runcanopus.py @@ -11,14 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging import os import subprocess import sys -from ..logconfig import LogConfig -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) def run_canopus(mgf_file, output_path, extra_params="--maxmz 600 formula zodiac structure canopus"): diff --git a/src/nplinker/genomics/antismash/antismash_downloader.py b/src/nplinker/genomics/antismash/antismash_downloader.py index 27a35d57..ac9aef2f 100644 --- a/src/nplinker/genomics/antismash/antismash_downloader.py +++ b/src/nplinker/genomics/antismash/antismash_downloader.py @@ -1,15 +1,15 @@ from __future__ import annotations +import logging import os import shutil from os import PathLike from pathlib import Path -from nplinker.logconfig import LogConfig from nplinker.utils import download_and_extract_archive from nplinker.utils import list_dirs from nplinker.utils import list_files -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) # urls to be given to download antismash data ANTISMASH_DB_DOWNLOAD_URL = "https://antismash-db.secondarymetabolites.org/output/{}/{}" diff --git a/src/nplinker/genomics/antismash/antismash_loader.py b/src/nplinker/genomics/antismash/antismash_loader.py index b6f578dd..08384817 100644 --- a/src/nplinker/genomics/antismash/antismash_loader.py +++ b/src/nplinker/genomics/antismash/antismash_loader.py @@ -1,17 +1,17 @@ from __future__ import annotations import fnmatch +import logging import os from Bio import SeqIO from Bio import SeqRecord from nplinker.genomics import BGC -from nplinker.logconfig import LogConfig from nplinker.strain import Strain from nplinker.utils import list_dirs from nplinker.utils import list_files from ..abc import BGCLoaderBase -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class AntismashBGCLoader: diff --git a/src/nplinker/genomics/antismash/podp_antismash_downloader.py b/src/nplinker/genomics/antismash/podp_antismash_downloader.py index 67ed8fc7..d463445a 100644 --- a/src/nplinker/genomics/antismash/podp_antismash_downloader.py +++ b/src/nplinker/genomics/antismash/podp_antismash_downloader.py @@ -1,5 +1,6 @@ from __future__ import annotations import json +import logging import re import time from os import PathLike @@ -11,11 +12,10 @@ from jsonschema import validate from nplinker.defaults import GENOME_STATUS_FILENAME from nplinker.genomics.antismash import download_and_extract_antismash_data -from nplinker.logconfig import LogConfig from nplinker.schemas import GENOME_STATUS_SCHEMA -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) NCBI_LOOKUP_URL = "https://www.ncbi.nlm.nih.gov/assembly/?term={}" JGI_GENOME_LOOKUP_URL = ( diff --git a/src/nplinker/genomics/bgc.py b/src/nplinker/genomics/bgc.py index 6fab5b83..c63c94cb 100644 --- a/src/nplinker/genomics/bgc.py +++ b/src/nplinker/genomics/bgc.py @@ -1,7 +1,7 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING from deprecated import deprecated -from nplinker.logconfig import LogConfig from .aa_pred import predict_aa @@ -9,7 +9,7 @@ from ..strain import Strain from .gcf import GCF -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class BGC: diff --git a/src/nplinker/genomics/bigscape/bigscape_loader.py b/src/nplinker/genomics/bigscape/bigscape_loader.py index 81a1d0ae..c4e7637b 100644 --- a/src/nplinker/genomics/bigscape/bigscape_loader.py +++ b/src/nplinker/genomics/bigscape/bigscape_loader.py @@ -1,13 +1,13 @@ from __future__ import annotations import csv +import logging import sqlite3 from os import PathLike -from nplinker.logconfig import LogConfig from ..abc import GCFLoaderBase from ..gcf import GCF -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class BigscapeGCFLoader: diff --git a/src/nplinker/genomics/bigscape/runbigscape.py b/src/nplinker/genomics/bigscape/runbigscape.py index d56e0310..2e10ba10 100644 --- a/src/nplinker/genomics/bigscape/runbigscape.py +++ b/src/nplinker/genomics/bigscape/runbigscape.py @@ -1,12 +1,12 @@ from __future__ import annotations +import logging import os import subprocess import sys from os import PathLike -from ...logconfig import LogConfig -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) PFAM_PATH = os.path.join(sys.prefix, "nplinker_lib") diff --git a/src/nplinker/genomics/gcf.py b/src/nplinker/genomics/gcf.py index 7cb9ad0c..eb5963c0 100644 --- a/src/nplinker/genomics/gcf.py +++ b/src/nplinker/genomics/gcf.py @@ -1,6 +1,6 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING -from nplinker.logconfig import LogConfig from nplinker.strain import StrainCollection @@ -8,7 +8,7 @@ from nplinker.strain import Strain from .bgc import BGC -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class GCF: diff --git a/src/nplinker/genomics/mibig/mibig_downloader.py b/src/nplinker/genomics/mibig/mibig_downloader.py index ae202c65..9b7d1d53 100644 --- a/src/nplinker/genomics/mibig/mibig_downloader.py +++ b/src/nplinker/genomics/mibig/mibig_downloader.py @@ -1,14 +1,14 @@ from __future__ import annotations +import logging import os import shutil from pathlib import Path -from nplinker.logconfig import LogConfig from nplinker.utils import download_and_extract_archive from nplinker.utils import list_dirs from nplinker.utils import list_files -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) MIBIG_METADATA_URL = "https://dl.secondarymetabolites.org/mibig/mibig_json_{version}.tar.gz" diff --git a/src/nplinker/genomics/mibig/mibig_loader.py b/src/nplinker/genomics/mibig/mibig_loader.py index c135d27d..38fed6b3 100644 --- a/src/nplinker/genomics/mibig/mibig_loader.py +++ b/src/nplinker/genomics/mibig/mibig_loader.py @@ -1,5 +1,5 @@ +import logging import os.path -from nplinker.logconfig import LogConfig from nplinker.strain import Strain from nplinker.utils import list_files from ..abc import BGCLoaderBase @@ -7,7 +7,7 @@ from .mibig_metadata import MibigMetadata -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class MibigLoader: diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py index 78b9905b..aa984e20 100644 --- a/src/nplinker/genomics/utils.py +++ b/src/nplinker/genomics/utils.py @@ -1,10 +1,10 @@ from __future__ import annotations import json +import logging from os import PathLike from pathlib import Path from jsonschema import validate from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME -from nplinker.logconfig import LogConfig from nplinker.schemas import GENOME_BGC_MAPPINGS_SCHEMA from nplinker.schemas import validate_podp_json from nplinker.strain import StrainCollection @@ -16,7 +16,7 @@ from .gcf import GCF -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) def generate_mappings_genome_id_bgc_id( diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index c2602ffe..cef3fe8e 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -1,3 +1,4 @@ +import logging import os from importlib.resources import files from deprecated import deprecated @@ -16,7 +17,6 @@ from nplinker.genomics.utils import add_bgc_to_gcf from nplinker.genomics.utils import add_strain_to_bgc from nplinker.genomics.utils import get_mibig_from_gcf -from nplinker.logconfig import LogConfig from nplinker.metabolomics.gnps import GNPSAnnotationLoader from nplinker.metabolomics.gnps import GNPSMolecularFamilyLoader from nplinker.metabolomics.gnps import GNPSSpectrumLoader @@ -27,7 +27,7 @@ from nplinker.strain.utils import load_user_strains -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) NPLINKER_APP_DATA_DIR = files("nplinker").joinpath("data") diff --git a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py index 77ae71ee..e4d25939 100644 --- a/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_spectrum_loader.py @@ -1,12 +1,12 @@ from __future__ import annotations +import logging from os import PathLike from pyteomics import mgf -from nplinker.logconfig import LogConfig from nplinker.metabolomics import Spectrum from nplinker.metabolomics.abc import SpectrumLoaderBase -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class GNPSSpectrumLoader(SpectrumLoaderBase): diff --git a/src/nplinker/metabolomics/utils.py b/src/nplinker/metabolomics/utils.py index 6380b3c6..1f1ce8a0 100644 --- a/src/nplinker/metabolomics/utils.py +++ b/src/nplinker/metabolomics/utils.py @@ -1,8 +1,8 @@ from __future__ import annotations import json +import logging from os import PathLike from pathlib import Path -from nplinker.logconfig import LogConfig from nplinker.schemas import validate_podp_json from nplinker.strain import StrainCollection from .gnps.gnps_file_mapping_loader import GNPSFileMappingLoader @@ -10,7 +10,7 @@ from .spectrum import Spectrum -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) def add_annotation_to_spectrum(annotations: dict[str, dict], spectra: list[Spectrum]) -> None: diff --git a/src/nplinker/parsers/kcb.py b/src/nplinker/parsers/kcb.py index d70ce21d..620c3c2e 100644 --- a/src/nplinker/parsers/kcb.py +++ b/src/nplinker/parsers/kcb.py @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from __future__ import annotations import json +import logging import os import re -from ..logconfig import LogConfig -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) # this will match strings like '...cluster001.gbk' or '...region022.gbk', # and allow the number to be extracted easily diff --git a/src/nplinker/pickler.py b/src/nplinker/pickler.py index 8f566898..133ab1fd 100644 --- a/src/nplinker/pickler.py +++ b/src/nplinker/pickler.py @@ -11,17 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging import os import pickle from .genomics import BGC from .genomics import GCF -from .logconfig import LogConfig from .metabolomics import MolecularFamily from .metabolomics import Spectrum -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) # This is based on the example at https://docs.python.org/3/library/pickle.html#pickle-persistent # Normally there are serious problems trying to pickle any object with a __hash__, diff --git a/src/nplinker/scoring/iokr/IOKR_deprecated.py b/src/nplinker/scoring/iokr/IOKR_deprecated.py index 3d2e64ff..c152f34f 100644 --- a/src/nplinker/scoring/iokr/IOKR_deprecated.py +++ b/src/nplinker/scoring/iokr/IOKR_deprecated.py @@ -11,16 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - # CG: these functions are not used by other, could be removed - -from nplinker.logconfig import LogConfig +import logging from . import nplinker_iokr from .spectrum import MSSpectrum -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) def run_iokr_ranking(spec, bgc_list): diff --git a/src/nplinker/scoring/iokr/iokrdata.py b/src/nplinker/scoring/iokr/iokrdata.py index a28bcf20..db17782e 100644 --- a/src/nplinker/scoring/iokr/iokrdata.py +++ b/src/nplinker/scoring/iokr/iokrdata.py @@ -11,19 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging import os import sys import time import numpy import scipy.io -from nplinker.logconfig import LogConfig from . import mk_fprints from . import spectrum from . import spectrum_filters -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) def normalise_kernel(matrix): diff --git a/src/nplinker/scoring/iokr/nplinker_iokr.py b/src/nplinker/scoring/iokr/nplinker_iokr.py index 468e2240..405e19a0 100644 --- a/src/nplinker/scoring/iokr/nplinker_iokr.py +++ b/src/nplinker/scoring/iokr/nplinker_iokr.py @@ -11,12 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging import os import time import numpy from nplinker.genomics import GCF -from nplinker.logconfig import LogConfig from nplinker.metabolomics import MolecularFamily from . import iokr_opt from . import iokrdata as iokrdataserver @@ -25,7 +24,7 @@ from . import spectrum_filters -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class NPLinkerIOKR: diff --git a/src/nplinker/scoring/iokr/spectrum_filters.py b/src/nplinker/scoring/iokr/spectrum_filters.py index 195feac0..eeb4da50 100644 --- a/src/nplinker/scoring/iokr/spectrum_filters.py +++ b/src/nplinker/scoring/iokr/spectrum_filters.py @@ -16,6 +16,7 @@ import os import pickle import numpy + # import sys # sys.path.append('/home/grimur/git/lda') # from lda.code.formula import Formula diff --git a/src/nplinker/scoring/link_collection.py b/src/nplinker/scoring/link_collection.py index eb2f8cea..6f6ff186 100644 --- a/src/nplinker/scoring/link_collection.py +++ b/src/nplinker/scoring/link_collection.py @@ -1,8 +1,8 @@ import itertools -from nplinker.logconfig import LogConfig +import logging -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class LinkCollection: diff --git a/src/nplinker/scoring/linking/data_links.py b/src/nplinker/scoring/linking/data_links.py index c1726461..0093f7bb 100644 --- a/src/nplinker/scoring/linking/data_links.py +++ b/src/nplinker/scoring/linking/data_links.py @@ -1,10 +1,10 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING from typing import Sequence import numpy as np import pandas as pd from nplinker.genomics.gcf import GCF -from nplinker.logconfig import LogConfig from nplinker.metabolomics import MolecularFamily from nplinker.metabolomics import Spectrum from .utils import calc_correlation_matrix @@ -15,7 +15,7 @@ from nplinker.strain import Strain from nplinker.strain import StrainCollection -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) LINK_TYPES = ["spec-gcf", "mf-gcf"] diff --git a/src/nplinker/scoring/linking/link_finder.py b/src/nplinker/scoring/linking/link_finder.py index 57421cf6..0e25a050 100644 --- a/src/nplinker/scoring/linking/link_finder.py +++ b/src/nplinker/scoring/linking/link_finder.py @@ -1,10 +1,10 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING import numpy as np import pandas as pd from scipy.stats import hypergeom from nplinker.genomics.gcf import GCF -from nplinker.logconfig import LogConfig from nplinker.metabolomics import MolecularFamily from nplinker.metabolomics import Spectrum from . import LINK_TYPES @@ -14,7 +14,7 @@ if TYPE_CHECKING: from . import DataLinks -logger = LogConfig.getLogger(__file__) +logger = logging.getLogger(__file__) # TODO CG: this class could be merged to MetcalfScoring class? diff --git a/src/nplinker/scoring/linking/link_likelihood.py b/src/nplinker/scoring/linking/link_likelihood.py index 7a284afa..f24487d8 100644 --- a/src/nplinker/scoring/linking/link_likelihood.py +++ b/src/nplinker/scoring/linking/link_likelihood.py @@ -1,10 +1,10 @@ from __future__ import annotations +import logging from deprecated import deprecated -from nplinker.logconfig import LogConfig from nplinker.scoring.linking.utils import calc_likelihood_matrix -logger = LogConfig.getLogger(__file__) +logger = logging.getLogger(__file__) @deprecated(version="1.3.3", reason="It's unused and will be removed in 2.0.0") diff --git a/src/nplinker/scoring/metcalf_scoring.py b/src/nplinker/scoring/metcalf_scoring.py index c5558105..7c0f930c 100644 --- a/src/nplinker/scoring/metcalf_scoring.py +++ b/src/nplinker/scoring/metcalf_scoring.py @@ -1,11 +1,11 @@ from __future__ import annotations +import logging import os from typing import TYPE_CHECKING import numpy as np import pandas as pd from nplinker.defaults import OUTPUT_DEFAULT_PATH from nplinker.genomics import GCF -from nplinker.logconfig import LogConfig from nplinker.metabolomics import MolecularFamily from nplinker.metabolomics import Spectrum from nplinker.pickler import load_pickled_data @@ -22,7 +22,7 @@ from ..nplinker import NPLinker from . import LinkCollection -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class MetcalfScoring(ScoringMethod): diff --git a/src/nplinker/scoring/methods.py b/src/nplinker/scoring/methods.py index aa016ce3..ad42cfa8 100644 --- a/src/nplinker/scoring/methods.py +++ b/src/nplinker/scoring/methods.py @@ -11,16 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from __future__ import annotations +import logging from typing import TYPE_CHECKING -from nplinker.logconfig import LogConfig if TYPE_CHECKING: from . import LinkCollection -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) # CG: TODO refactor this class to abstract base class diff --git a/src/nplinker/scoring/np_class_scoring.py b/src/nplinker/scoring/np_class_scoring.py index 8a441937..fa028b93 100644 --- a/src/nplinker/scoring/np_class_scoring.py +++ b/src/nplinker/scoring/np_class_scoring.py @@ -1,14 +1,14 @@ +import logging import time from nplinker.genomics import BGC from nplinker.genomics import GCF -from nplinker.logconfig import LogConfig from nplinker.metabolomics import Spectrum from nplinker.scoring.metcalf_scoring import MetcalfScoring from nplinker.scoring.methods import ScoringMethod from nplinker.scoring.object_link import ObjectLink -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class NPClassScoring(ScoringMethod): diff --git a/src/nplinker/scoring/rosetta/rosetta.py b/src/nplinker/scoring/rosetta/rosetta.py index c9937a66..3ab7a287 100644 --- a/src/nplinker/scoring/rosetta/rosetta.py +++ b/src/nplinker/scoring/rosetta/rosetta.py @@ -11,12 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import csv +import logging import os from nplinker.scoring.rosetta.rosetta_hit import RosettaHit from ...genomics import BGC -from ...logconfig import LogConfig from ...parsers.kcb import KCBJSONParser from ...parsers.kcb import KCBTextParser from ...pickler import load_pickled_data @@ -24,7 +23,7 @@ from .spec_lib import SpecLib -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class Rosetta: diff --git a/src/nplinker/scoring/rosetta/spec_lib.py b/src/nplinker/scoring/rosetta/spec_lib.py index 88cc2bba..6896a3a4 100644 --- a/src/nplinker/scoring/rosetta/spec_lib.py +++ b/src/nplinker/scoring/rosetta/spec_lib.py @@ -11,14 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging from sortedcontainers import SortedList from nplinker.metabolomics.gnps import GNPSSpectrumLoader -from ...logconfig import LogConfig from .rosetta_functions import fast_cosine -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class SpecLib: diff --git a/src/nplinker/scoring/rosetta_scoring.py b/src/nplinker/scoring/rosetta_scoring.py index d5fbcff6..88bb94bf 100644 --- a/src/nplinker/scoring/rosetta_scoring.py +++ b/src/nplinker/scoring/rosetta_scoring.py @@ -1,14 +1,14 @@ import itertools +import logging from nplinker.genomics.bgc import BGC from nplinker.genomics.gcf import GCF -from nplinker.logconfig import LogConfig from nplinker.metabolomics import MolecularFamily from nplinker.scoring.methods import ScoringMethod from nplinker.scoring.object_link import ObjectLink from nplinker.scoring.rosetta.rosetta import Rosetta -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class RosettaScoring(ScoringMethod): diff --git a/src/nplinker/strain/strain.py b/src/nplinker/strain/strain.py index 6517fbe9..358ed371 100644 --- a/src/nplinker/strain/strain.py +++ b/src/nplinker/strain/strain.py @@ -1,8 +1,8 @@ from __future__ import annotations -from nplinker.logconfig import LogConfig +import logging -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class Strain: diff --git a/src/nplinker/strain/strain_collection.py b/src/nplinker/strain/strain_collection.py index 6be0c8e3..74933665 100644 --- a/src/nplinker/strain/strain_collection.py +++ b/src/nplinker/strain/strain_collection.py @@ -1,14 +1,14 @@ from __future__ import annotations import json +import logging from os import PathLike from typing import Iterator from jsonschema import validate -from nplinker.logconfig import LogConfig from nplinker.schemas import STRAIN_MAPPINGS_SCHEMA from .strain import Strain -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class StrainCollection: diff --git a/src/nplinker/strain/utils.py b/src/nplinker/strain/utils.py index c1cbcf34..0443067e 100644 --- a/src/nplinker/strain/utils.py +++ b/src/nplinker/strain/utils.py @@ -1,8 +1,8 @@ from __future__ import annotations import json +import logging from os import PathLike from jsonschema import validate -from nplinker.logconfig import LogConfig from nplinker.schemas import USER_STRAINS_SCHEMA from ..genomics.utils import extract_mappings_original_genome_id_resolved_genome_id from ..genomics.utils import extract_mappings_resolved_genome_id_bgc_id @@ -15,7 +15,7 @@ from .strain_collection import StrainCollection -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) def load_user_strains(json_file: str | PathLike) -> set[Strain]: From d053a30f181f575231e984520b23d2424d181ba1 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 18:04:37 +0200 Subject: [PATCH 05/13] replace print with logger.info --- src/nplinker/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py index 8523f365..007fcf08 100644 --- a/src/nplinker/utils.py +++ b/src/nplinker/utils.py @@ -17,6 +17,7 @@ import csv import gzip import hashlib +import logging import lzma import os import os.path @@ -29,6 +30,7 @@ from typing import Callable import httpx from tqdm import tqdm +logger = logging.getLogger(__name__) def find_delimiter(file: str | PathLike) -> str: @@ -142,7 +144,7 @@ def download_url( # check if file is already present locally if fpath.is_file() and md5 is not None and check_md5(fpath, md5): - print("Using downloaded and verified file: " + str(fpath)) + logger.info("Using downloaded and verified file: " + str(fpath)) return # download the file @@ -376,6 +378,7 @@ def extract_archive( # create the extract directory if not exist extract_root.mkdir(exist_ok=True) + logger.info(f"Extracting {from_path} to {extract_root}") suffix, archive_type, compression = _detect_file_type(from_path) if not archive_type: return _decompress( @@ -429,7 +432,6 @@ def download_and_extract_archive( download_url(url, download_root, filename, md5) archive = download_root / filename - print(f"Extracting {archive} to {extract_root}") extract_archive(archive, extract_root, remove_finished=remove_finished) From 8fb3429a1613a1367e2c0b3712c3379cba67ddf4 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 10 May 2024 09:16:01 +0200 Subject: [PATCH 06/13] Delete logconfig.py --- src/nplinker/logconfig.py | 78 --------------------------------------- 1 file changed, 78 deletions(-) delete mode 100644 src/nplinker/logconfig.py diff --git a/src/nplinker/logconfig.py b/src/nplinker/logconfig.py deleted file mode 100644 index 5c2028aa..00000000 --- a/src/nplinker/logconfig.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2021 The NPLinker Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import sys - - -class LogConfig: - active_loggers: dict[str, logging.Logger] = {} - logfmt = "%(asctime)s [%(levelname)s] %(filename)s:%(lineno)d, %(message)s" - default_loglevel = logging.INFO - # default destination for new Loggers - default_logdest = logging.StreamHandler(sys.stdout) - # additional destinations to be added to new Loggers - additional_logdests: list[logging.Handler] = [] - - @staticmethod - def getLogger(obj: str, level=default_loglevel, dest=default_logdest) -> logging.Logger: - """Return a logging.Logger associated with the object . - - The Logger's level and dest values will be set to the corresponding - parameters passed to this method. - """ - if obj in LogConfig.active_loggers: - return LogConfig.active_loggers[obj] - - logger = logging.getLogger(obj) - logger.setLevel(level) - dest.setFormatter(logging.Formatter(LogConfig.logfmt, datefmt="%H:%M:%S")) - logger.addHandler(dest) - LogConfig.active_loggers[obj] = logger - return logger - - @staticmethod - def setLogLevel(level): - """Apply a new log level value to all loggers created by getLogger.""" - LogConfig.default_loglevel = level - for logger in LogConfig.active_loggers.values(): - logger.setLevel(level) - - @staticmethod - def setLogLevelStr(level): - """Apply a new log level value to all loggers created by getLogger. - - Identical to setLogLevel but parameter is a string instead of a - constant from the logging module (e.g. "INFO", "DEBUG") - """ - if not hasattr(logging, level): - raise Exception(f'Unknown/invalid loglevel "{level}"') - - LogConfig.setLogLevel(getattr(logging, level)) - - @staticmethod - def setLogDestination(dest): - LogConfig.default_logdest = dest - LogConfig.additional_logdests = [] - dest.setFormatter(logging.Formatter(LogConfig.logfmt)) - for logger in LogConfig.active_loggers.values(): - logger.handlers = [] - logger.addHandler(dest) - - @staticmethod - def addLogDestination(dest): - LogConfig.additional_logdests.append(dest) - dest.setFormatter(logging.Formatter(LogConfig.logfmt)) - for logger in LogConfig.active_loggers.values(): - logger.addHandler(dest) From b9c7efac6bee30bb976ee481c888fa4c48575404 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 18:05:34 +0200 Subject: [PATCH 07/13] remove dependency tqdm use rich.progress instead --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dfa6e469..a9574ec0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,6 @@ dependencies = [ "rich", "scipy", "sortedcontainers", - "tqdm", ] [project.optional-dependencies] From a5dbcacb1745de4248abe634da2d74190afd30f1 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 18:06:09 +0200 Subject: [PATCH 08/13] replace tqdm with rich.progress for displaying progress bar --- src/nplinker/utils.py | 30 +++++++++++++++++++++++++----- tests/integration/conftest.py | 10 +++++----- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py index 007fcf08..aedccfa4 100644 --- a/src/nplinker/utils.py +++ b/src/nplinker/utils.py @@ -29,7 +29,15 @@ from typing import IO from typing import Callable import httpx -from tqdm import tqdm +from rich.progress import BarColumn +from rich.progress import DownloadColumn +from rich.progress import Progress +from rich.progress import TextColumn +from rich.progress import TimeElapsedColumn +from rich.progress import TimeRemainingColumn +from rich.progress import TransferSpeedColumn + + logger = logging.getLogger(__name__) @@ -156,12 +164,24 @@ def download_url( f"Failed to download url {url} with status code {response.status_code}" ) total = int(response.headers.get("Content-Length", 0)) - with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress: - num_bytes_downloaded = response.num_bytes_downloaded + + with Progress( + TextColumn("[progress.description]{task.description}"), + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + DownloadColumn(), + "•", + TransferSpeedColumn(), + "•", + TimeRemainingColumn(), + "•", + TimeElapsedColumn(), + ) as progress: + task = progress.add_task(f"[hot_pink]Downloading {fpath.name}", total=total) for chunk in response.iter_bytes(): fh.write(chunk) - progress.update(response.num_bytes_downloaded - num_bytes_downloaded) - num_bytes_downloaded = response.num_bytes_downloaded + progress.update(task, advance=len(chunk)) # check integrity of downloaded file if md5 is not None and not check_md5(fpath, md5): diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 8d1370ef..764312b8 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -6,7 +6,7 @@ from os import PathLike from pathlib import Path import httpx -from tqdm import tqdm +from rich.progress import Progress from . import DATA_DIR @@ -86,9 +86,9 @@ def download_archive( response.raise_for_status() print(f"Downloading test dataset {url} to {root}") total = int(response.headers.get("Content-Length", 0)) - with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress: - num_bytes_downloaded = response.num_bytes_downloaded + + with Progress() as progress: + task = progress.add_task(f"[hot_pink]Downloading {fpath.name}", total=total) for chunk in response.iter_bytes(): fh.write(chunk) - progress.update(response.num_bytes_downloaded - num_bytes_downloaded) - num_bytes_downloaded = response.num_bytes_downloaded + progress.update(task, advance=len(chunk)) From 1948cb965b9fdce977830c243ac663c291d4e886 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 14:14:14 +0200 Subject: [PATCH 09/13] update nplinker configs for logging - update comments - rename `to_stdout` to `use_console` - Remove `cast=Path` for `log.file`, as the config variable is first cast, then examined on its type. --- src/nplinker/config.py | 6 +++--- src/nplinker/data/nplinker.toml | 13 +++++++------ src/nplinker/nplinker_default.toml | 2 +- tests/unit/test_config.py | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/nplinker/config.py b/src/nplinker/config.py index 199b22d9..57704e1d 100644 --- a/src/nplinker/config.py +++ b/src/nplinker/config.py @@ -39,7 +39,7 @@ Validator("podp_id", required=True, when=Validator("mode", eq="podp")), Validator("podp_id", required=False, when=Validator("mode", eq="local")), # Log - ## `loglevel` must be a string and must be one of the supported levels. It is transformed to + ## `level` must be a string and must be one of the supported levels. It is transformed to ## uppercase to avoid case sensitivity. Validator( "log.level", @@ -47,8 +47,8 @@ cast=lambda v: v.upper(), is_in=["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ), - Validator("log.file", is_type_of=str, cast=Path), - Validator("log.to_stdout", is_type_of=bool), + Validator("log.file", is_type_of=str), + Validator("log.use_console", is_type_of=bool), # Mibig Validator("mibig.to_use", required=True, is_type_of=bool), Validator( diff --git a/src/nplinker/data/nplinker.toml b/src/nplinker/data/nplinker.toml index d4ef6dff..e0209418 100644 --- a/src/nplinker/data/nplinker.toml +++ b/src/nplinker/data/nplinker.toml @@ -18,16 +18,17 @@ podp_id = "" [log] # Log level. The available levels are same as the levels in python package `logging`: -# "NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". +# "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". # The default value is "INFO". level = "INFO" -# Redirect the log messages from stdout to a log file. If not set, the log messages will only be -# printed to stdout. -# The value is optional and must be a full path if set. +# The log file to append log messages. +# The value is optional. +# If not set or use empty string, log messages will not be written to a file. +# The file will be created if it does not exist. Log messages will be appended to the file if it exists. file = "path/to/logfile" -# Whether to print log messages to stdout in addition to writing to the logfile. +# Whether to write log meesages to console. # The default value is true. -to_stdout = true +use_console = true [mibig] diff --git a/src/nplinker/nplinker_default.toml b/src/nplinker/nplinker_default.toml index e81304f9..4d88540a 100644 --- a/src/nplinker/nplinker_default.toml +++ b/src/nplinker/nplinker_default.toml @@ -2,7 +2,7 @@ [log] level = "INFO" -to_stdout = true +use_console = true [mibig] to_use = true diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 45ae92a1..343f120c 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -11,7 +11,7 @@ def test_config(): # The following are default values from nplinker_default.toml assert config.get("log.file") is None - assert config.log.to_stdout is True + assert config.log.use_console is True assert config.mibig.to_use is True assert config.mibig.version == "3.1" From 8db49373f64b6af140ccc2211675d09eb5bff4cb Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 10 May 2024 09:45:30 +0200 Subject: [PATCH 10/13] update logging setup in NPLinker class --- src/nplinker/nplinker.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py index 4dd2545f..17af95e7 100644 --- a/src/nplinker/nplinker.py +++ b/src/nplinker/nplinker.py @@ -2,13 +2,13 @@ import logging import sys from typing import TYPE_CHECKING +from . import setup_logging from .arranger import DatasetArranger from .config import config from .genomics import BGC from .genomics import GCF from .loader import NPLINKER_APP_DATA_DIR from .loader import DatasetLoader -from .logconfig import LogConfig from .metabolomics import MolecularFamily from .metabolomics import Spectrum from .pickler import save_pickled_data @@ -23,7 +23,7 @@ from collections.abc import Sequence from .strain import Strain -logger = LogConfig.getLogger(__name__) +logger = logging.getLogger(__name__) class NPLinker: @@ -39,17 +39,11 @@ class NPLinker: def __init__(self): """Initialise an NPLinker instance.""" - # configure logging based on the supplied config params - LogConfig.setLogLevelStr(config.log.level) - logfile = config.get("log.file") - if logfile: - logfile_dest = logging.FileHandler(logfile) - # if we want to log to stdout plus logfile, add the new destination - if config.get("log.to_stdout"): # default to True - LogConfig.addLogDestination(logfile_dest) - else: - # otherwise overwrite the default stdout destination - LogConfig.setLogDestination(logfile_dest) + setup_logging( + level=config.log.level, + file=config.log.get("file", ""), + use_console=config.log.use_console, + ) self._loader = DatasetLoader() From 2c2876bdaf21366b9dd6deba8641932f86238b97 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 10 May 2024 09:47:33 +0200 Subject: [PATCH 11/13] print out configs when initializing NPLinker class --- src/nplinker/nplinker.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py index 17af95e7..fd184660 100644 --- a/src/nplinker/nplinker.py +++ b/src/nplinker/nplinker.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging import sys +from pprint import pformat from typing import TYPE_CHECKING from . import setup_logging from .arranger import DatasetArranger @@ -44,6 +45,7 @@ def __init__(self): file=config.log.get("file", ""), use_console=config.log.use_console, ) + logger.info("Configuration:\n %s", pformat(config.as_dict(), width=20, sort_dicts=False)) self._loader = DatasetLoader() From 4e1a0276749166051f337ea6989b7ef0f87d74fe Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Thu, 9 May 2024 17:53:55 +0200 Subject: [PATCH 12/13] change log message from debug to info --- src/nplinker/class_info/chem_classes.py | 2 +- .../antismash/podp_antismash_downloader.py | 4 ++-- src/nplinker/loader.py | 16 ++++++------- src/nplinker/nplinker.py | 6 ++--- src/nplinker/parsers/kcb.py | 8 +++---- src/nplinker/scoring/iokr/spectrum_filters.py | 1 - src/nplinker/scoring/link_collection.py | 8 +++---- src/nplinker/scoring/linking/data_links.py | 8 +++---- .../scoring/linking/link_likelihood.py | 2 +- src/nplinker/scoring/metcalf_scoring.py | 24 +++++++++---------- 10 files changed, 39 insertions(+), 40 deletions(-) diff --git a/src/nplinker/class_info/chem_classes.py b/src/nplinker/class_info/chem_classes.py index 8477ceef..b99f9f47 100644 --- a/src/nplinker/class_info/chem_classes.py +++ b/src/nplinker/class_info/chem_classes.py @@ -263,7 +263,7 @@ class prediction for a level. When no class is present, instead of Tuple it will # use canopus_treemap to produce NPClassifier classes # TODO: probably change when sirius v5 comes out - logger.debug("Using canopus_treemap to get NPC classes") + logger.info("Using canopus_treemap to get NPC classes") canopus_workspace = Canopus(sirius=self._canopus_dir) npc_file = os.path.join(self._canopus_dir, "npc_summary.tsv") canopus_workspace.npcSummary().to_csv(npc_file, sep=sep) diff --git a/src/nplinker/genomics/antismash/podp_antismash_downloader.py b/src/nplinker/genomics/antismash/podp_antismash_downloader.py index d463445a..515fdffe 100644 --- a/src/nplinker/genomics/antismash/podp_antismash_downloader.py +++ b/src/nplinker/genomics/antismash/podp_antismash_downloader.py @@ -248,7 +248,7 @@ def _ncbi_genbank_search(genbank_id: str, retry_times: int = 3) -> Tag | Navigab url = NCBI_LOOKUP_URL.format(genbank_id) retry = 1 while retry <= retry_times: - logger.debug(f"Looking up GenBank data for {genbank_id} at {url}") + logger.info(f"Looking up GenBank data for {genbank_id} at {url}") resp = httpx.get(url, follow_redirects=True) if resp.status_code == httpx.codes.OK: # the page should contain a
element with class "assembly_summary_new". retrieving @@ -298,7 +298,7 @@ def _resolve_genbank_accession(genbank_id: str) -> str: # get rid of any extraneous whitespace genbank_id = genbank_id.strip() - logger.debug(f'Parsed GenBank ID to "{genbank_id}"') + logger.info(f'Parsed GenBank ID to "{genbank_id}"') # run a search using the GenBank accession ID try: diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index cef3fe8e..41b84d89 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -102,7 +102,7 @@ def _load_metabolomics(self): Strain objects added (i.e. `MolecularFamily._strains` updated). This means only Spectra objects with updated strains (i.e. `self.spectra`) can be added to MolecularFamily objects. """ - logger.debug("\nLoading metabolomics data starts...") + logger.info(f"{'='*40}\nLoading metabolomics data starts...") # Step 1: load all Spectrum objects raw_spectra = GNPSSpectrumLoader(GNPS_DEFAULT_PATH / GNPS_SPECTRA_FILENAME).spectra @@ -127,7 +127,7 @@ def _load_metabolomics(self): self.spectra = spectra_with_strains self.molfams = mf_with_spec - logger.debug("Loading metabolomics data completed\n") + logger.info("Loading metabolomics data completed\n") return True def _load_genomics(self): @@ -141,10 +141,10 @@ def _load_genomics(self): added (i.e. `GCF._strains` updated). This means only BGC objects with updated Strain objects (i.e. `self.bgcs`) can be added to GCF objects. """ - logger.debug("\nLoading genomics data starts...") + logger.info(f"{'='*40}\nLoading genomics data starts...") # Step 1: load antismash BGC objects & add strain info - logger.debug("Parsing AntiSMASH directory...") + logger.info("Parsing AntiSMASH directory...") antismash_bgcs = AntismashBGCLoader(str(defaults.ANTISMASH_DEFAULT_PATH)).get_bgcs() antismash_bgcs_with_strain, _ = add_strain_to_bgc(self.strains, antismash_bgcs) @@ -164,10 +164,10 @@ def _load_genomics(self): # switch depending on found file. prefer V1 if both are found if bigscape_cluster_file.exists(): loader = BigscapeGCFLoader(bigscape_cluster_file) - logger.debug(f"Loading BigSCAPE cluster file {bigscape_cluster_file}") + logger.info(f"Loading BigSCAPE cluster file {bigscape_cluster_file}") elif bigscape_db_file.exists(): loader = BigscapeV2GCFLoader(bigscape_db_file) - logger.debug(f"Loading BigSCAPE database file {bigscape_db_file}") + logger.info(f"Loading BigSCAPE database file {bigscape_db_file}") else: raise FileNotFoundError( f"Neither BigSCAPE cluster file {bigscape_cluster_file} nor database file {bigscape_db_file} were found." @@ -190,7 +190,7 @@ def _load_genomics(self): self.gcfs = all_gcfs_with_bgc self.mibig_strains_in_use = mibig_strains_in_use - logger.debug("Loading genomics data completed\n") + logger.info("Loading genomics data completed\n") return True @deprecated(reason="To be refactored. It was used in the `self.load` method before.") @@ -244,7 +244,7 @@ def _load_class_info(self): chem_classes = ChemClassPredictions(self.canopus_dir, self.molnetenhancer_dir, self._root) # noqa # if no molfam classes transfer them from spectra (due to old style MN) if not chem_classes.canopus.molfam_classes and chem_classes.canopus.spectra_classes: - logger.debug("Added chemical compound classes for MFs") + logger.info("Added chemical compound classes for MFs") chem_classes.canopus.transfer_spec_classes_to_molfams(self.molfams) # include them in loader self.chem_classes = chem_classes diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py index fd184660..d039de62 100644 --- a/src/nplinker/nplinker.py +++ b/src/nplinker/nplinker.py @@ -69,7 +69,7 @@ def __init__(self): for name, method in NPLinker.SCORING_METHODS.items(): if len(config_methods) == 0 or name in config_methods: self._scoring_methods[name] = method - logger.debug(f"Enabled scoring method: {name}") + logger.info(f"Enabled scoring method: {name}") self._scoring_methods_setup_complete = { name: False for name in self._scoring_methods.keys() @@ -279,9 +279,9 @@ def get_links( if (source, target) in shared_strains: link.shared_strains = shared_strains[(source, target)] - logger.debug("Finished calculating shared strain information") + logger.info("Finished calculating shared strain information") - logger.debug("Final size of link collection is {}".format(len(link_collection))) + logger.info("Final size of link collection is {}".format(len(link_collection))) return link_collection def get_common_strains( diff --git a/src/nplinker/parsers/kcb.py b/src/nplinker/parsers/kcb.py index 620c3c2e..9d46f3df 100644 --- a/src/nplinker/parsers/kcb.py +++ b/src/nplinker/parsers/kcb.py @@ -45,14 +45,14 @@ def __init__(self, bgcs): if not os.path.exists(bgc.antismash_file): raise Exception('KCBJSONParser failed to find file "{}"'.format(bgc.antismash_file)) - logger.debug(f"KCBJSONParser({len(bgcs)} BGCs)") + logger.info(f"KCBJSONParser({len(bgcs)} BGCs)") # find the JSON file: TODO is the assumption of there only being a single .json # file always going to work? otherwise have to try guessing the name based on # genome IDs prefix = os.path.dirname(bgcs[0].antismash_file) json_files = list(filter(lambda f: f.endswith(".json"), os.listdir(prefix))) - logger.debug("Found {} JSON files in {}".format(len(json_files), prefix)) + logger.info("Found {} JSON files in {}".format(len(json_files), prefix)) if len(json_files) == 0: logger.warning("Unable to find an antiSMASH JSON output file in {}".format(prefix)) @@ -60,7 +60,7 @@ def __init__(self, bgcs): return self.json_filename = os.path.join(prefix, json_files[0]) - logger.debug(f"Using JSON file {self.json_filename}") + logger.info(f"Using JSON file {self.json_filename}") def parse_hits(self): if self.json_filename is None: @@ -139,7 +139,7 @@ def parse_hits(self): if hits is not None: self.collected_hits.update(hits) - logger.debug( + logger.info( "KCBJSONParser: collected {} total hit entries".format(len(self.collected_hits)) ) diff --git a/src/nplinker/scoring/iokr/spectrum_filters.py b/src/nplinker/scoring/iokr/spectrum_filters.py index eeb4da50..195feac0 100644 --- a/src/nplinker/scoring/iokr/spectrum_filters.py +++ b/src/nplinker/scoring/iokr/spectrum_filters.py @@ -16,7 +16,6 @@ import os import pickle import numpy - # import sys # sys.path.append('/home/grimur/git/lda') # from lda.code.formula import Formula diff --git a/src/nplinker/scoring/link_collection.py b/src/nplinker/scoring/link_collection.py index 6f6ff186..f734686e 100644 --- a/src/nplinker/scoring/link_collection.py +++ b/src/nplinker/scoring/link_collection.py @@ -36,14 +36,14 @@ def _add_links_from_method(self, method, object_links): # only results that appear in both sets if not self._and_mode: - logger.debug( + logger.info( "Merging {} results from method {} in OR mode".format( len(object_links), method.name ) ) self._merge_or_mode(object_links) else: - logger.debug( + logger.info( "Merging {} results from method {} in AND mode".format( len(object_links), method.name ) @@ -98,12 +98,12 @@ def _merge_or_mode(self, object_links): def filter_no_shared_strains(self): len_before = len(self._link_data) self.filter_links(lambda x: len(x.shared_strains) > 0) - logger.debug("filter_no_shared_strains: {} => {}".format(len_before, len(self._link_data))) + logger.info("filter_no_shared_strains: {} => {}".format(len_before, len(self._link_data))) def filter_sources(self, callable_obj): len_before = len(self._link_data) self._link_data = {k: v for k, v in self._link_data.items() if callable_obj(k)} - logger.debug("filter_sources: {} => {}".format(len_before, len(self._link_data))) + logger.info("filter_sources: {} => {}".format(len_before, len(self._link_data))) def filter_targets(self, callable_obj, sources=None): to_remove = [] diff --git a/src/nplinker/scoring/linking/data_links.py b/src/nplinker/scoring/linking/data_links.py index 0093f7bb..265acadc 100644 --- a/src/nplinker/scoring/linking/data_links.py +++ b/src/nplinker/scoring/linking/data_links.py @@ -74,7 +74,7 @@ def __init__( """ self._strains = strains - logger.debug( + logger.info( "Create occurrence dataframes: spectra<->strains, gcfs<->strains and mfs<->strains." ) # DataFrame to store occurrence of gcfs/spectra/mfs with respect to strains @@ -84,14 +84,14 @@ def __init__( self.occurrence_mf_strain = self._get_occurrence_mf_strain(mfs, strains) # DataFrame to store co-occurrence of "spectra<->gcf" or "mfs<->gcf" - logger.debug("Create correlation matrices: spectra<->gcfs.") + logger.info("Create correlation matrices: spectra<->gcfs.") ( self.cooccurrence_spec_gcf, self.cooccurrence_spec_notgcf, self.cooccurrence_notspec_gcf, self.cooccurrence_notspec_notgcf, ) = self._get_cooccurrence(link_type="spec-gcf") - logger.debug("Create correlation matrices: mol-families<->gcfs.") + logger.info("Create correlation matrices: mol-families<->gcfs.") ( self.cooccurrence_mf_gcf, self.cooccurrence_mf_notgcf, @@ -240,7 +240,7 @@ def _get_cooccurrence( raise ValueError( f"Link type {link_type} is not supported. Use 'spec-gcf' or 'mf-gcf' instead." ) - logger.debug(f"Calculating correlation matrices of type: {link_type}") + logger.info(f"Calculating correlation matrices of type: {link_type}") m1, m2, m3, m4 = calc_correlation_matrix(met_strain_occurrence, self.occurrence_gcf_strain) df_met_gcf = pd.DataFrame( m1, diff --git a/src/nplinker/scoring/linking/link_likelihood.py b/src/nplinker/scoring/linking/link_likelihood.py index f24487d8..f2f56769 100644 --- a/src/nplinker/scoring/linking/link_likelihood.py +++ b/src/nplinker/scoring/linking/link_likelihood.py @@ -59,7 +59,7 @@ def calculate_likelihoods(self, data_links, type="spec-gcf"): "Wrong correlation 'type' given. Must be one of 'spec-gcf', 'mf-gcf'..." ) - logger.debug(f"Calculating likelihood matrices of type: {type}") + logger.info(f"Calculating likelihood matrices of type: {type}") # Calculate likelihood matrices using calc_likelihood_matrix() ( P_type2_given_type1, diff --git a/src/nplinker/scoring/metcalf_scoring.py b/src/nplinker/scoring/metcalf_scoring.py index 7c0f930c..a2ac3f19 100644 --- a/src/nplinker/scoring/metcalf_scoring.py +++ b/src/nplinker/scoring/metcalf_scoring.py @@ -86,7 +86,7 @@ def setup(npl: NPLinker): ] datalinks, linkfinder = None, None if os.path.exists(cache_file): - logger.debug("MetcalfScoring.setup loading cached data") + logger.info("MetcalfScoring.setup loading cached data") cache_data = load_pickled_data(npl, cache_file) cache_ok = True if cache_data is not None: @@ -108,7 +108,7 @@ def setup(npl: NPLinker): MetcalfScoring.LINKFINDER = LinkFinder() MetcalfScoring.LINKFINDER.calc_score(MetcalfScoring.DATALINKS, link_type=LINK_TYPES[0]) MetcalfScoring.LINKFINDER.calc_score(MetcalfScoring.DATALINKS, link_type=LINK_TYPES[1]) - logger.debug("MetcalfScoring.setup caching results") + logger.info("MetcalfScoring.setup caching results") save_pickled_data( (dataset_counts, MetcalfScoring.DATALINKS, MetcalfScoring.LINKFINDER), cache_file ) @@ -163,7 +163,7 @@ def get_links( ("LinkFinder object not found. Have you called `MetcalfScoring.setup(npl)`?") ) - logger.debug(f"MetcalfScoring: standardised = {self.standardised}") + logger.info(f"MetcalfScoring: standardised = {self.standardised}") if not self.standardised: scores_list = self.LINKFINDER.get_links(*objects, score_cutoff=self.cutoff) # TODO CG: verify the logics of standardised score and add unit tests @@ -180,14 +180,14 @@ def get_links( GCF | Spectrum | MolecularFamily, dict[GCF | Spectrum | MolecularFamily, ObjectLink] ] = {} if obj_type == "gcf": - logger.debug( + logger.info( f"MetcalfScoring: input_type=GCF, result_type=Spec/MolFam, " f"#inputs={len(objects)}." ) for scores in scores_list: # when no links found if scores.shape[1] == 0: - logger.debug(f'MetcalfScoring: found no "{scores.name}" links') + logger.info(f'MetcalfScoring: found no "{scores.name}" links') else: # when links found for col_index in range(scores.shape[1]): @@ -202,16 +202,16 @@ def get_links( link_scores[gcf][met] = ObjectLink( gcf, met, self, scores.loc["score", col_index] ) - logger.debug(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.") + logger.info(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.") else: - logger.debug( + logger.info( f"MetcalfScoring: input_type=Spec/MolFam, result_type=GCF, " f"#inputs={len(objects)}." ) scores = scores_list[0] # when no links found if scores.shape[1] == 0: - logger.debug(f'MetcalfScoring: found no links "{scores.name}" for input objects') + logger.info(f'MetcalfScoring: found no links "{scores.name}" for input objects') else: for col_index in range(scores.shape[1]): gcf = self.npl.lookup_gcf(scores.loc["target", col_index]) @@ -224,10 +224,10 @@ def get_links( link_scores[met][gcf] = ObjectLink( met, gcf, self, scores.loc["score", col_index] ) - logger.debug(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.") + logger.info(f"MetcalfScoring: found {len(link_scores)} {scores.name} links.") link_collection._add_links_from_method(self, link_scores) - logger.debug("MetcalfScoring: completed") + logger.info("MetcalfScoring: completed") return link_collection def _calc_standardised_score_met( @@ -237,7 +237,7 @@ def _calc_standardised_score_met( raise ValueError( "Metcalf mean and std not found. Have you called `MetcalfScoring.setup(npl)`?" ) - logger.debug("Calculating standardised Metcalf scores (met input)") + logger.info("Calculating standardised Metcalf scores (met input)") raw_score = results[0] z_scores = [] for col_index in range(raw_score.shape[1]): @@ -276,7 +276,7 @@ def _calc_standardised_score_gen( raise ValueError( "Metcalf mean and std not found. Have you called `MetcalfScoring.setup(npl)`?" ) - logger.debug("Calculating standardised Metcalf scores (gen input)") + logger.info("Calculating standardised Metcalf scores (gen input)") postprocessed_scores = [] for raw_score in results: z_scores = [] From 0e57dda62eb301173c6cd6767671377b184b3ddf Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 10 May 2024 16:37:27 +0200 Subject: [PATCH 13/13] Update __init__.py --- src/nplinker/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nplinker/__init__.py b/src/nplinker/__init__.py index 7fdfe99d..0b347ec8 100644 --- a/src/nplinker/__init__.py +++ b/src/nplinker/__init__.py @@ -23,7 +23,7 @@ def setup_logging(level: str = "INFO", file: str = "", use_console: bool = True) from rich.logging import RichHandler # Get the acncestor logger "nplinker" - logger = logging.getLogger(__name__.split(".")[0]) + logger = logging.getLogger(__name__) logger.setLevel(level) # File handler