Skip to content

Commit

Permalink
rename globals.py to defaults.py
Browse files Browse the repository at this point in the history
`globals` is a name of python built-in function. So better to use `defaults` to avoid naming conflicts.
  • Loading branch information
CunliangGeng committed Apr 18, 2024
1 parent 5921a82 commit a7b526a
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 61 deletions.
88 changes: 44 additions & 44 deletions src/nplinker/arranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
from glob import glob
from pathlib import Path
from jsonschema import validate
import nplinker.globals as globals
import nplinker.defaults as defaults
from nplinker.config import config
from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.defaults import GENOME_STATUS_FILENAME
from nplinker.defaults import STRAIN_MAPPINGS_FILENAME
from nplinker.genomics.antismash import podp_download_and_extract_antismash_data
from nplinker.genomics.bigscape.runbigscape import run_bigscape
from nplinker.genomics.mibig import download_and_extract_mibig_metadata
from nplinker.genomics.utils import generate_mappings_genome_id_bgc_id
from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.globals import GENOME_STATUS_FILENAME
from nplinker.globals import STRAIN_MAPPINGS_FILENAME
from nplinker.metabolomics.gnps import GNPSDownloader
from nplinker.metabolomics.gnps import GNPSExtractor
from nplinker.schemas import STRAIN_MAPPINGS_SCHEMA
Expand All @@ -36,14 +36,14 @@ class DatasetArranger:
If `config.mode` is "local", the datasets are validated.
If `config.mode` is "podp", the datasets are downloaded or generated.
It uses the default downloads directory `globals.DOWNLOADS_DEFAULT_PATH` to store the
It uses the default downloads directory `defaults.DOWNLOADS_DEFAULT_PATH` to store the
downloaded files. Default data paths for MIBiG, GNPS, antiSMASH, and BiG-SCAPE are defined
in `nplinker.globals`.
in `nplinker.defaults`.
"""

def __init__(self) -> None:
# Prepare the downloads directory and/or PODP json file which are required for other methods
globals.DOWNLOADS_DEFAULT_PATH.mkdir(exist_ok=True)
defaults.DOWNLOADS_DEFAULT_PATH.mkdir(exist_ok=True)
self.arrange_podp_project_json()

def arrange(self) -> None:
Expand All @@ -69,11 +69,11 @@ def arrange_podp_project_json(self) -> None:
"""
if config.mode == "podp":
file_name = f"paired_datarecord_{config.podp_id}.json"
podp_file = globals.DOWNLOADS_DEFAULT_PATH / file_name
podp_file = defaults.DOWNLOADS_DEFAULT_PATH / file_name
if not podp_file.exists():
download_url(
PODP_PROJECT_URL.format(config.podp_id),
globals.DOWNLOADS_DEFAULT_PATH,
defaults.DOWNLOADS_DEFAULT_PATH,
file_name,
)

Expand All @@ -90,12 +90,12 @@ def arrange_mibig(self) -> None:
default directory.
"""
if config.mibig.to_use:
if globals.MIBIG_DEFAULT_PATH.exists():
if defaults.MIBIG_DEFAULT_PATH.exists():
# remove existing mibig data
shutil.rmtree(globals.MIBIG_DEFAULT_PATH)
shutil.rmtree(defaults.MIBIG_DEFAULT_PATH)
download_and_extract_mibig_metadata(
globals.DOWNLOADS_DEFAULT_PATH,
globals.MIBIG_DEFAULT_PATH,
defaults.DOWNLOADS_DEFAULT_PATH,
defaults.MIBIG_DEFAULT_PATH,
version=config.mibig.version,
)

Expand All @@ -120,16 +120,16 @@ def arrange_gnps(self) -> None:
# retry downloading at most 3 times if downloaded data has problems
for _ in range(3):
try:
validate_gnps(globals.GNPS_DEFAULT_PATH)
validate_gnps(defaults.GNPS_DEFAULT_PATH)
pass_validation = True
break
except (FileNotFoundError, ValueError):
# Don't need to remove downloaded archive, as it'll be overwritten
shutil.rmtree(globals.GNPS_DEFAULT_PATH, ignore_errors=True)
shutil.rmtree(defaults.GNPS_DEFAULT_PATH, ignore_errors=True)
self._download_and_extract_gnps()

if not pass_validation:
validate_gnps(globals.GNPS_DEFAULT_PATH)
validate_gnps(defaults.GNPS_DEFAULT_PATH)

# get the path to file_mappings file (csv or tsv)
self.gnps_file_mappings_file = self._get_gnps_file_mappings_file()
Expand All @@ -143,8 +143,8 @@ def _get_gnps_file_mappings_file(self) -> Path:
Returns:
Path to the GNPS file mappings file.
"""
file_mappings_tsv = globals.GNPS_DEFAULT_PATH / globals.GNPS_FILE_MAPPINGS_TSV
file_mappings_csv = globals.GNPS_DEFAULT_PATH / globals.GNPS_FILE_MAPPINGS_CSV
file_mappings_tsv = defaults.GNPS_DEFAULT_PATH / defaults.GNPS_FILE_MAPPINGS_TSV
file_mappings_csv = defaults.GNPS_DEFAULT_PATH / defaults.GNPS_FILE_MAPPINGS_CSV

gnps_file_mappings_file = (
file_mappings_tsv if file_mappings_tsv.exists() else file_mappings_csv
Expand All @@ -158,17 +158,17 @@ def _download_and_extract_gnps(self) -> None:
Get the GNPS task ID from the PODP project JSON file, then download and extract the GNPS
data to the default GNPS directory.
"""
podp_file = globals.DOWNLOADS_DEFAULT_PATH / f"paired_datarecord_{config.podp_id}.json"
podp_file = defaults.DOWNLOADS_DEFAULT_PATH / f"paired_datarecord_{config.podp_id}.json"
with open(podp_file, "r") as f:
podp_json_data = json.load(f)
gnps_task_id = podp_json_data["metabolomics"]["project"].get("molecular_network")

data_archive = (
GNPSDownloader(gnps_task_id, globals.DOWNLOADS_DEFAULT_PATH)
GNPSDownloader(gnps_task_id, defaults.DOWNLOADS_DEFAULT_PATH)
.download()
.get_download_file()
)
GNPSExtractor(data_archive, globals.GNPS_DEFAULT_PATH)
GNPSExtractor(data_archive, defaults.GNPS_DEFAULT_PATH)

def arrange_antismash(self) -> None:
"""Arrange the antiSMASH data.
Expand Down Expand Up @@ -199,27 +199,27 @@ def arrange_antismash(self) -> None:
if config.mode == "podp":
for _ in range(3):
try:
validate_antismash(globals.ANTISMASH_DEFAULT_PATH)
validate_antismash(defaults.ANTISMASH_DEFAULT_PATH)
pass_validation = True
break
except FileNotFoundError:
shutil.rmtree(globals.ANTISMASH_DEFAULT_PATH, ignore_errors=True)
shutil.rmtree(defaults.ANTISMASH_DEFAULT_PATH, ignore_errors=True)
self._download_and_extract_antismash()

if not pass_validation:
validate_antismash(globals.ANTISMASH_DEFAULT_PATH)
validate_antismash(defaults.ANTISMASH_DEFAULT_PATH)

def _download_and_extract_antismash(self) -> None:
"""Download and extract the antiSMASH data.
Get the antiSMASH data from the PODP project JSON file, then download and extract the
antiSMASH data to the default antiSMASH directory.
"""
podp_file = globals.DOWNLOADS_DEFAULT_PATH / f"paired_datarecord_{config.podp_id}.json"
podp_file = defaults.DOWNLOADS_DEFAULT_PATH / f"paired_datarecord_{config.podp_id}.json"
with open(podp_file, "r") as f:
podp_json_data = json.load(f)
podp_download_and_extract_antismash_data(
podp_json_data["genomes"], globals.DOWNLOADS_DEFAULT_PATH, config.root_dir
podp_json_data["genomes"], defaults.DOWNLOADS_DEFAULT_PATH, config.root_dir
)

def arrange_bigscape(self) -> None:
Expand All @@ -244,15 +244,15 @@ def arrange_bigscape(self) -> None:
if config.mode == "podp":
for _ in range(3):
try:
validate_bigscape(globals.BIGSCAPE_DEFAULT_PATH)
validate_bigscape(defaults.BIGSCAPE_DEFAULT_PATH)
pass_validation = True
break
except FileNotFoundError:
shutil.rmtree(globals.BIGSCAPE_DEFAULT_PATH, ignore_errors=True)
shutil.rmtree(defaults.BIGSCAPE_DEFAULT_PATH, ignore_errors=True)
self._run_bigscape()

if not pass_validation:
validate_bigscape(globals.BIGSCAPE_DEFAULT_PATH)
validate_bigscape(defaults.BIGSCAPE_DEFAULT_PATH)

def _run_bigscape(self) -> None:
"""Run BiG-SCAPE to generate the clustering file.
Expand All @@ -262,22 +262,22 @@ def _run_bigscape(self) -> None:
The clustering file "mix_clustering_c{config.bigscape.cutoff}.tsv" will be copied to the
default BiG-SCAPE directory.
"""
globals.BIGSCAPE_RUNNING_OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
defaults.BIGSCAPE_RUNNING_OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
run_bigscape(
globals.ANTISMASH_DEFAULT_PATH,
globals.BIGSCAPE_RUNNING_OUTPUT_PATH,
defaults.ANTISMASH_DEFAULT_PATH,
defaults.BIGSCAPE_RUNNING_OUTPUT_PATH,
config.bigscape.parameters,
)
for f in glob(
str(
globals.BIGSCAPE_RUNNING_OUTPUT_PATH
defaults.BIGSCAPE_RUNNING_OUTPUT_PATH
/ "network_files"
/ "*"
/ "mix"
/ "mix_clustering_c*.tsv"
)
):
shutil.copy(f, globals.BIGSCAPE_DEFAULT_PATH)
shutil.copy(f, defaults.BIGSCAPE_DEFAULT_PATH)

def arrange_strain_mappings(self) -> None:
"""Arrange the strain mappings file.
Expand Down Expand Up @@ -319,14 +319,14 @@ def _validate_strain_mappings(self) -> None:

def _generate_strain_mappings(self) -> None:
"""Generate the strain mappings file for the PODP mode."""
podp_json_file = globals.DOWNLOADS_DEFAULT_PATH / f"paired_datarecord_{config.podp_id}.json"
genome_status_json_file = globals.DOWNLOADS_DEFAULT_PATH / GENOME_STATUS_FILENAME
genome_bgc_mappings_file = globals.ANTISMASH_DEFAULT_PATH / GENOME_BGC_MAPPINGS_FILENAME
podp_json_file = defaults.DOWNLOADS_DEFAULT_PATH / f"paired_datarecord_{config.podp_id}.json"
genome_status_json_file = defaults.DOWNLOADS_DEFAULT_PATH / GENOME_STATUS_FILENAME
genome_bgc_mappings_file = defaults.ANTISMASH_DEFAULT_PATH / GENOME_BGC_MAPPINGS_FILENAME
gnps_file_mapping_file = self.gnps_file_mappings_file
strain_mappings_file = config.root_dir / STRAIN_MAPPINGS_FILENAME

# generate the genome_bgc_mappings_file
generate_mappings_genome_id_bgc_id(globals.ANTISMASH_DEFAULT_PATH)
generate_mappings_genome_id_bgc_id(defaults.ANTISMASH_DEFAULT_PATH)
# generate the strain_mappings_file
podp_generate_strain_mappings(
podp_json_file,
Expand All @@ -343,7 +343,7 @@ def arrange_strains_selected(self) -> None:
The validation checks if the strains selected file is a valid JSON file according to the
schema defined in `schemas/user_strains.json`.
"""
strains_selected_file = config.root_dir / globals.STRAINS_SELECTED_FILENAME
strains_selected_file = config.root_dir / defaults.STRAINS_SELECTED_FILENAME
if strains_selected_file.exists():
with open(strains_selected_file, "r") as f:
json_data = json.load(f)
Expand Down Expand Up @@ -371,8 +371,8 @@ def validate_gnps(gnps_dir: Path) -> None:
if not gnps_dir.exists():
raise FileNotFoundError(f"GNPS data directory not found at {gnps_dir}")

file_mappings_tsv = gnps_dir / globals.GNPS_FILE_MAPPINGS_TSV
file_mappings_csv = gnps_dir / globals.GNPS_FILE_MAPPINGS_CSV
file_mappings_tsv = gnps_dir / defaults.GNPS_FILE_MAPPINGS_TSV
file_mappings_csv = gnps_dir / defaults.GNPS_FILE_MAPPINGS_CSV
if file_mappings_tsv.exists() and file_mappings_csv.exists():
raise ValueError(
f"Both {file_mappings_tsv.name} and {file_mappings_csv.name} found in GNPS directory "
Expand All @@ -385,9 +385,9 @@ def validate_gnps(gnps_dir: Path) -> None:
)

required_files = [
gnps_dir / globals.GNPS_SPECTRA_FILENAME,
gnps_dir / globals.GNPS_MOLECULAR_FAMILY_FILENAME,
gnps_dir / globals.GNPS_ANNOTATIONS_FILENAME,
gnps_dir / defaults.GNPS_SPECTRA_FILENAME,
gnps_dir / defaults.GNPS_MOLECULAR_FAMILY_FILENAME,
gnps_dir / defaults.GNPS_ANNOTATIONS_FILENAME,
]
list_not_found = [f.name for f in required_files if not f.exists()]
if list_not_found:
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from bs4 import NavigableString
from bs4 import Tag
from jsonschema import validate
from nplinker.defaults import GENOME_STATUS_FILENAME
from nplinker.genomics.antismash import download_and_extract_antismash_data
from nplinker.globals import GENOME_STATUS_FILENAME
from nplinker.logconfig import LogConfig
from nplinker.schemas import GENOME_STATUS_SCHEMA

Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/genomics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from os import PathLike
from pathlib import Path
from jsonschema import validate
from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.logconfig import LogConfig
from nplinker.schemas import GENOME_BGC_MAPPINGS_SCHEMA
from nplinker.schemas import validate_podp_json
Expand Down Expand Up @@ -244,7 +244,7 @@ def extract_mappings_original_genome_id_resolved_genome_id(
Notes:
The `genome_status_json_file` is usually generated by the
`podp_download_and_extract_antismash_data` function with
a default file name defined in `nplinker.globals.GENOME_STATUS_FILENAME`.
a default file name defined in `nplinker.defaults.GENOME_STATUS_FILENAME`.
"""
gs_mappings_dict = GenomeStatus.read_json(genome_status_json_file)
return {gs.original_id: gs.resolved_refseq_id for gs in gs_mappings_dict.values()}
Expand All @@ -265,7 +265,7 @@ def extract_mappings_resolved_genome_id_bgc_id(
Notes:
The `genome_bgc_mappings_file` is usually generated by the
`generate_mappings_genome_id_bgc_id` function with a default file name
defined in `nplinker.globals.GENOME_BGC_MAPPINGS_FILENAME`.
defined in `nplinker.defaults.GENOME_BGC_MAPPINGS_FILENAME`.
"""
with open(genome_bgc_mappings_file, "r") as f:
json_data = json.load(f)
Expand Down
22 changes: 11 additions & 11 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
import os
from importlib.resources import files
from deprecated import deprecated
from nplinker import globals
from nplinker import defaults
from nplinker.config import config
from nplinker.defaults import GNPS_ANNOTATIONS_FILENAME
from nplinker.defaults import GNPS_DEFAULT_PATH
from nplinker.defaults import GNPS_MOLECULAR_FAMILY_FILENAME
from nplinker.defaults import GNPS_SPECTRA_FILENAME
from nplinker.defaults import STRAIN_MAPPINGS_FILENAME
from nplinker.defaults import STRAINS_SELECTED_FILENAME
from nplinker.genomics.antismash import AntismashBGCLoader
from nplinker.genomics.bigscape import BigscapeGCFLoader
from nplinker.genomics.bigscape import BigscapeV2GCFLoader
from nplinker.genomics.mibig import MibigLoader
from nplinker.genomics.utils import add_bgc_to_gcf
from nplinker.genomics.utils import add_strain_to_bgc
from nplinker.genomics.utils import get_mibig_from_gcf
from nplinker.globals import GNPS_ANNOTATIONS_FILENAME
from nplinker.globals import GNPS_DEFAULT_PATH
from nplinker.globals import GNPS_MOLECULAR_FAMILY_FILENAME
from nplinker.globals import GNPS_SPECTRA_FILENAME
from nplinker.globals import STRAIN_MAPPINGS_FILENAME
from nplinker.globals import STRAINS_SELECTED_FILENAME
from nplinker.logconfig import LogConfig
from nplinker.metabolomics.gnps import GNPSAnnotationLoader
from nplinker.metabolomics.gnps import GNPSMolecularFamilyLoader
Expand Down Expand Up @@ -145,21 +145,21 @@ def _load_genomics(self):

# Step 1: load antismash BGC objects & add strain info
logger.debug("Parsing AntiSMASH directory...")
antismash_bgcs = AntismashBGCLoader(str(globals.ANTISMASH_DEFAULT_PATH)).get_bgcs()
antismash_bgcs = AntismashBGCLoader(str(defaults.ANTISMASH_DEFAULT_PATH)).get_bgcs()
antismash_bgcs_with_strain, _ = add_strain_to_bgc(self.strains, antismash_bgcs)

# Step 2: load mibig BGC objects (having strain info)
if config.mibig.to_use:
self.mibig_bgcs = MibigLoader(str(globals.MIBIG_DEFAULT_PATH)).get_bgcs()
self.mibig_bgcs = MibigLoader(str(defaults.MIBIG_DEFAULT_PATH)).get_bgcs()

# Step 3: get all BGC objects with strain info
all_bgcs_with_strain = antismash_bgcs_with_strain + self.mibig_bgcs

# Step 4: load all GCF objects
bigscape_cluster_file = (
globals.BIGSCAPE_DEFAULT_PATH / f"mix_clustering_c{config.bigscape.cutoff}.tsv"
defaults.BIGSCAPE_DEFAULT_PATH / f"mix_clustering_c{config.bigscape.cutoff}.tsv"
)
bigscape_db_file = globals.BIGSCAPE_DEFAULT_PATH / "data_sqlite.db"
bigscape_db_file = defaults.BIGSCAPE_DEFAULT_PATH / "data_sqlite.db"

# switch depending on found file. prefer V1 if both are found
if bigscape_cluster_file.exists():
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/genomics/test_podp_antismash_downloader.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
from pathlib import Path
import pytest
from nplinker.defaults import GENOME_STATUS_FILENAME
from nplinker.genomics.antismash import GenomeStatus
from nplinker.genomics.antismash import podp_download_and_extract_antismash_data
from nplinker.globals import GENOME_STATUS_FILENAME
from nplinker.utils import list_files


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/genomics/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations
import json
import pytest
from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.genomics import BGC
from nplinker.genomics import GCF
from nplinker.genomics.utils import add_bgc_to_gcf
Expand All @@ -11,7 +12,6 @@
from nplinker.genomics.utils import generate_mappings_genome_id_bgc_id
from nplinker.genomics.utils import get_mappings_strain_id_bgc_id
from nplinker.genomics.utils import get_mibig_from_gcf
from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.strain import Strain
from nplinker.strain import StrainCollection
from .. import DATA_DIR
Expand Down

0 comments on commit a7b526a

Please sign in to comment.