Skip to content

Commit

Permalink
fix errors in docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
CunliangGeng committed Mar 5, 2024
1 parent 8585067 commit accf6dd
Show file tree
Hide file tree
Showing 13 changed files with 36 additions and 26 deletions.
9 changes: 8 additions & 1 deletion src/nplinker/arranger.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def arrange_gnps(self) -> None:
existing GNPS data and re-download it if it is invalid.
The validation process includes:
- Check if the GNPS data directory exists.
- Check if the required files exist in the GNPS data directory, including:
- file_mappings.tsv or file_mappings.csv
Expand Down Expand Up @@ -182,6 +183,7 @@ def arrange_antismash(self) -> None:
is a number).
AntiSMASH BGC directory must follow the structure below:
```
antismash
├── genome_id_1 (one AntiSMASH output, e.g. GCF_000514775.1)
│  ├── GCF_000514775.1.gbk
Expand All @@ -190,6 +192,7 @@ def arrange_antismash(self) -> None:
├── genome_id_2
│  ├── ...
└── ...
```
"""
pass_validation = False
if config.mode == "podp":
Expand Down Expand Up @@ -229,9 +232,10 @@ def arrange_bigscape(self) -> None:
will be copied to the default BiG-SCAPE directory.
The validation process includes:
- Check if the default BiG-SCAPE data directory exists.
- Check if the clustering file "mix_clustering_c{config.bigscape.cutoff}.tsv" exists in the
BiG-SCAPE data directory.
BiG-SCAPE data directory.
"""
pass_validation = False
if config.mode == "podp":
Expand Down Expand Up @@ -290,6 +294,7 @@ def _validate_strain_mappings(self) -> None:
"""Validate the strain mappings file.
The validation process includes:
- Check if the strain mappings file exists.
- Check if the strain mappings file is a valid JSON file according to the schema defined in
`schemas/strain_mappings_schema.json`.
Expand Down Expand Up @@ -346,6 +351,7 @@ def validate_gnps(gnps_dir: Path) -> None:
"""Validate the GNPS data directory and its contents.
The GNPS data directory must contain the following files:
- file_mappings.tsv or file_mappings.csv
- spectra.mgf
- molecular_families.tsv
Expand Down Expand Up @@ -392,6 +398,7 @@ def validate_antismash(antismash_dir: Path) -> None:
The validation only checks the structure of the antiSMASH data directory and file names.
It does not check
- the content of the BGC files
- the consistency between the antiSMASH data and the PODP project JSON file for the PODP
mode
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/genomics/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ def get_bgcs(self) -> Sequence[BGC]:
"""Get BGC objects.
Returns:
A list of :class:`~nplinker.genomic.BGC` objects
A list of BGC objects
"""


class GCFLoaderBase(ABC):
@abstractmethod
def get_gcfs(self, keep_mibig_only, keep_singleton) -> Sequence[GCF]:
def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> Sequence[GCF]:
"""Get GCF objects.
Args:
Expand All @@ -44,5 +44,5 @@ def get_gcfs(self, keep_mibig_only, keep_singleton) -> Sequence[GCF]:
is a GCF that contains only one BGC.
Returns:
A list of :class:`~nplinker.genomic.GCF` objects
A list of GCF objects
"""
8 changes: 5 additions & 3 deletions src/nplinker/genomics/antismash/antismash_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self, data_dir: str) -> None:
Note:
AntiSMASH BGC directory must follow the structure below:
```
antismash
├── genome_id_1 (one AntiSMASH output, e.g. GCF_000514775.1)
│  ├── GCF_000514775.1.gbk
Expand All @@ -28,9 +29,10 @@ def __init__(self, data_dir: str) -> None:
├── genome_id_2
│  ├── ...
└── ...
```
Args:
antismash_dir: Path to AntiSMASH directory that contains a
data_dir: Path to AntiSMASH directory that contains a
collection of AntiSMASH outputs.
"""
self.data_dir = data_dir
Expand All @@ -43,8 +45,8 @@ def get_bgc_genome_mapping(self) -> dict[str, str]:
Note that the directory name of the gbk file is treated as genome id.
Returns:
The key is BGC name (gbk file name) and value is genome id (the directory name of the gbk
file).
The key is BGC name (gbk file name) and value is genome id (the directory name of the
gbk file).
"""
return {
bid: os.path.basename(os.path.dirname(bpath)) for bid, bpath in self._file_dict.items()
Expand Down
4 changes: 2 additions & 2 deletions src/nplinker/genomics/bgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,12 @@ def is_mibig(self) -> bool:
# this property is not used in NPLinker core business.
@property
@deprecated(version="2.0.0", reason="This method will be removed soon")
def aa_predictions(self):
def aa_predictions(self) -> list:
"""Amino acids as predicted monomers of product.
Returns:
list of dicts with key as amino acid and value as prediction
probability.
probability.
"""
# Load aa predictions and cache them
self._aa_predictions = None
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/genomics/bigscape/bigscape_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, cluster_file: str | PathLike, /) -> None:
self.cluster_file = str(cluster_file)
self._gcf_list = self._parse_gcf(self.cluster_file)

def get_gcfs(self, keep_mibig_only=False, keep_singleton=False) -> list[GCF]:
def get_gcfs(self, keep_mibig_only: bool = False, keep_singleton: bool = False) -> list[GCF]:
"""Get all GCF objects.
Args:
Expand Down
6 changes: 3 additions & 3 deletions src/nplinker/genomics/mibig/mibig_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def download_and_extract_mibig_metadata(
):
"""Download and extract MIBiG metadata json files.
Note that it does not matter whether the metadata json files are in nested folders or not in the archive,
all json files will be extracted to the same location, i.e. `extract_path`. The nested
folders will be removed if they exist. So the `extract_path` will have only json files.
Note that it does not matter whether the metadata json files are in nested folders or not in the archive,
all json files will be extracted to the same location, i.e. `extract_path`. The nested
folders will be removed if they exist. So the `extract_path` will have only json files.
Args:
download_root: Path to the directory in which to place the downloaded archive.
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/genomics/mibig/mibig_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class MibigMetadata:
def __init__(self, file) -> None:
def __init__(self, file: str) -> None:
"""Class to model the BGC metadata/annotations defined in MIBiG.
MIBiG is a specification of BGC metadata and use JSON schema to
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/metabolomics/gnps/gnps_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self, file: str | PathLike, extract_dir: str | PathLike):
Args:
file: The path to the GNPS zip file.
extract_path: path to the directory where to extract the files to.
extract_dir: path to the directory where to extract the files to.
Raises:
ValueError: If the given file is an invalid GNPS archive.
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/metabolomics/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def peaks(self) -> np.ndarray:
"""Get the peaks, a 2D array with each row containing the values of (m/z, intensity)."""
return np.array(list(zip(self.mz, self.intensity)))

def has_strain(self, strain: Strain):
def has_strain(self, strain: Strain) -> bool:
"""Check if the given strain exists in the spectrum.
Args:
Expand Down
11 changes: 7 additions & 4 deletions src/nplinker/nplinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .pickler import save_pickled_data
from .scoring.link_collection import LinkCollection
from .scoring.metcalf_scoring import MetcalfScoring
from .scoring.methods import ScoringMethod
from .scoring.np_class_scoring import NPClassScoring
from .scoring.rosetta_scoring import RosettaScoring

Expand Down Expand Up @@ -119,11 +120,11 @@ def save_repro_data(self, filename):
logger.info(f"Saving reproducibility data to {filename}")

@property
def root_dir(self):
def root_dir(self) -> str:
"""Returns path to the current dataset root directory.
Returns:
the path to the dataset root directory currently in use
The path to the dataset root directory currently in use
"""
return config.root_dir

Expand Down Expand Up @@ -154,7 +155,9 @@ def load_data(self):
self._class_matches = self._loader.class_matches

# TODO CG: refactor this method and update its unit tests
def get_links(self, input_objects, scoring_methods, and_mode=True):
def get_links(
self, input_objects: list, scoring_methods: list, and_mode: bool = True
) -> LinkCollection:
"""Find links for a set of input objects (BGCs/GCFs/Spectra/MolFams).
The input objects can be any mix of the following NPLinker types:
Expand Down Expand Up @@ -388,7 +391,7 @@ def class_matches(self):
"""ClassMatches with the matched classes and scoring tables from MIBiG."""
return self._class_matches

def scoring_method(self, name):
def scoring_method(self, name: str) -> ScoringMethod | None:
"""Return an instance of a scoring method.
Args:
Expand Down
3 changes: 1 addition & 2 deletions src/nplinker/pairedomics/podp_antismash_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,7 @@ def get_best_available_genome_id(genome_id_data: dict[str, str]) -> str | None:
"""Get the best available ID from genome_id_data dict.
Args:
genome_id_data: dictionary containing information
for each genome record present.
genome_id_data: dictionary containing information for each genome record present.
Returns:
ID for the genome, if present, otherwise None.
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/pairedomics/strain_mappings_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def extract_mappings_ms_filename_spectrum_id(
from GNPS website to a file with a default name defined in `GNPS_FILE_MAPPINGS_FILENAME`.
See Also:
`GNPSFileMappingLoader`: A class to load GNPS file mappings file.
GNPSFileMappingLoader: A class to load GNPS file mappings file.
"""
loader = GNPSFileMappingLoader(gnps_file_mappings_file)
return loader.mapping_reversed
Expand Down
5 changes: 2 additions & 3 deletions src/nplinker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ def download_url(
md5: MD5 checksum of the download. If None, do not check.
http_method: HTTP request method, e.g. "GET", "POST".
Defaults to "GET".
allow_http_redirect: If true, enable following redirects
for all HTTP ("http:") methods.
allow_http_redirect: If true, enable following redirects for all HTTP ("http:") methods.
"""
root = transform_to_full_path(root)
# create the download directory if not exist
Expand Down Expand Up @@ -172,7 +171,7 @@ def list_dirs(root: str | PathLike, keep_parent: bool = True) -> list[str]:
Args:
root: Path to directory whose folders need to be listed
prefix: If true, prepends the path to each result, otherwise
keep_parent: If true, prepends the path to each result, otherwise
only returns the name of the directories found
"""
root = transform_to_full_path(root)
Expand Down

0 comments on commit accf6dd

Please sign in to comment.