fix errors in docstrings

NPLinker · Mar 5, 2024 · accf6dd · accf6dd
1 parent 8585067
commit accf6dd
Show file tree

Hide file tree

Showing 13 changed files with 36 additions and 26 deletions.
diff --git a/src/nplinker/arranger.py b/src/nplinker/arranger.py
@@ -106,6 +106,7 @@ def arrange_gnps(self) -> None:
         existing GNPS data and re-download it if it is invalid.
 
         The validation process includes:
+
         - Check if the GNPS data directory exists.
         - Check if the required files exist in the GNPS data directory, including:
             - file_mappings.tsv or file_mappings.csv
@@ -182,6 +183,7 @@ def arrange_antismash(self) -> None:
             is a number).
 
         AntiSMASH BGC directory must follow the structure below:
+        ```
         antismash
             ├── genome_id_1 (one AntiSMASH output, e.g. GCF_000514775.1)
             │  ├── GCF_000514775.1.gbk
@@ -190,6 +192,7 @@ def arrange_antismash(self) -> None:
             ├── genome_id_2
             │  ├── ...
             └── ...
+        ```
         """
         pass_validation = False
         if config.mode == "podp":
@@ -229,9 +232,10 @@ def arrange_bigscape(self) -> None:
         will be copied to the default BiG-SCAPE directory.
 
         The validation process includes:
+
         - Check if the default BiG-SCAPE data directory exists.
         - Check if the clustering file "mix_clustering_c{config.bigscape.cutoff}.tsv" exists in the
-            BiG-SCAPE data directory.
+                BiG-SCAPE data directory.
         """
         pass_validation = False
         if config.mode == "podp":
@@ -290,6 +294,7 @@ def _validate_strain_mappings(self) -> None:
         """Validate the strain mappings file.
 
         The validation process includes:
+
         - Check if the strain mappings file exists.
         - Check if the strain mappings file is a valid JSON file according to the schema defined in
             `schemas/strain_mappings_schema.json`.
@@ -346,6 +351,7 @@ def validate_gnps(gnps_dir: Path) -> None:
     """Validate the GNPS data directory and its contents.
 
     The GNPS data directory must contain the following files:
+
     - file_mappings.tsv or file_mappings.csv
     - spectra.mgf
     - molecular_families.tsv
@@ -392,6 +398,7 @@ def validate_antismash(antismash_dir: Path) -> None:
 
     The validation only checks the structure of the antiSMASH data directory and file names.
     It does not check
+
     - the content of the BGC files
     - the consistency between the antiSMASH data and the PODP project JSON file for the PODP
         mode

diff --git a/src/nplinker/genomics/abc.py b/src/nplinker/genomics/abc.py
@@ -28,13 +28,13 @@ def get_bgcs(self) -> Sequence[BGC]:
         """Get BGC objects.
 
         Returns:
-            A list of :class:`~nplinker.genomic.BGC` objects
+            A list of BGC objects
         """
 
 
 class GCFLoaderBase(ABC):
     @abstractmethod
-    def get_gcfs(self, keep_mibig_only, keep_singleton) -> Sequence[GCF]:
+    def get_gcfs(self, keep_mibig_only: bool, keep_singleton: bool) -> Sequence[GCF]:
         """Get GCF objects.
 
         Args:
@@ -44,5 +44,5 @@ def get_gcfs(self, keep_mibig_only, keep_singleton) -> Sequence[GCF]:
                 is a GCF that contains only one BGC.
 
         Returns:
-            A list of :class:`~nplinker.genomic.GCF` objects
+            A list of GCF objects
         """
diff --git a/src/nplinker/genomics/antismash/antismash_loader.py b/src/nplinker/genomics/antismash/antismash_loader.py
@@ -20,6 +20,7 @@ def __init__(self, data_dir: str) -> None:
 
         Note:
             AntiSMASH BGC directory must follow the structure below:
+            ```
             antismash
                 ├── genome_id_1 (one AntiSMASH output, e.g. GCF_000514775.1)
                 │  ├── GCF_000514775.1.gbk
@@ -28,9 +29,10 @@ def __init__(self, data_dir: str) -> None:
                 ├── genome_id_2
                 │  ├── ...
                 └── ...
+            ```
 
         Args:
-            antismash_dir: Path to AntiSMASH directory that contains a
+            data_dir: Path to AntiSMASH directory that contains a
                 collection of AntiSMASH outputs.
         """
         self.data_dir = data_dir
@@ -43,8 +45,8 @@ def get_bgc_genome_mapping(self) -> dict[str, str]:
         Note that the directory name of the gbk file is treated as genome id.
 
         Returns:
-            The key is BGC name (gbk file name) and value is genome id (the directory name of the gbk
-              file).
+            The key is BGC name (gbk file name) and value is genome id (the directory name of the
+            gbk file).
         """
         return {
             bid: os.path.basename(os.path.dirname(bpath)) for bid, bpath in self._file_dict.items()

diff --git a/src/nplinker/genomics/bgc.py b/src/nplinker/genomics/bgc.py
@@ -151,12 +151,12 @@ def is_mibig(self) -> bool:
     # this property is not used in NPLinker core business.
     @property
     @deprecated(version="2.0.0", reason="This method will be removed soon")
-    def aa_predictions(self):
+    def aa_predictions(self) -> list:
         """Amino acids as predicted monomers of product.
 
         Returns:
             list of dicts with key as amino acid and value as prediction
-                probability.
+            probability.
         """
         # Load aa predictions and cache them
         self._aa_predictions = None

diff --git a/src/nplinker/genomics/bigscape/bigscape_loader.py b/src/nplinker/genomics/bigscape/bigscape_loader.py
@@ -23,7 +23,7 @@ def __init__(self, cluster_file: str | PathLike, /) -> None:
         self.cluster_file = str(cluster_file)
         self._gcf_list = self._parse_gcf(self.cluster_file)
 
-    def get_gcfs(self, keep_mibig_only=False, keep_singleton=False) -> list[GCF]:
+    def get_gcfs(self, keep_mibig_only: bool = False, keep_singleton: bool = False) -> list[GCF]:
         """Get all GCF objects.
 
         Args:

diff --git a/src/nplinker/genomics/mibig/mibig_downloader.py b/src/nplinker/genomics/mibig/mibig_downloader.py
@@ -30,9 +30,9 @@ def download_and_extract_mibig_metadata(
 ):
     """Download and extract MIBiG metadata json files.
 
-        Note that it does not matter whether the metadata json files are in nested folders or not in the archive,
-        all json files will be extracted to the same location, i.e. `extract_path`. The nested
-        folders will be removed if they exist. So the `extract_path` will have only json files.
+    Note that it does not matter whether the metadata json files are in nested folders or not in the archive,
+    all json files will be extracted to the same location, i.e. `extract_path`. The nested
+    folders will be removed if they exist. So the `extract_path` will have only json files.
 
     Args:
         download_root: Path to the directory in which to place the downloaded archive.

diff --git a/src/nplinker/genomics/mibig/mibig_metadata.py b/src/nplinker/genomics/mibig/mibig_metadata.py
@@ -2,7 +2,7 @@
 
 
 class MibigMetadata:
-    def __init__(self, file) -> None:
+    def __init__(self, file: str) -> None:
         """Class to model the BGC metadata/annotations defined in MIBiG.
 
         MIBiG is a specification of BGC metadata and use JSON schema to

diff --git a/src/nplinker/metabolomics/gnps/gnps_extractor.py b/src/nplinker/metabolomics/gnps/gnps_extractor.py
@@ -37,7 +37,7 @@ def __init__(self, file: str | PathLike, extract_dir: str | PathLike):
 
         Args:
             file: The path to the GNPS zip file.
-            extract_path: path to the directory where to extract the files to.
+            extract_dir: path to the directory where to extract the files to.
 
         Raises:
             ValueError: If the given file is an invalid GNPS archive.

diff --git a/src/nplinker/metabolomics/spectrum.py b/src/nplinker/metabolomics/spectrum.py
@@ -76,7 +76,7 @@ def peaks(self) -> np.ndarray:
         """Get the peaks, a 2D array with each row containing the values of (m/z, intensity)."""
         return np.array(list(zip(self.mz, self.intensity)))
 
-    def has_strain(self, strain: Strain):
+    def has_strain(self, strain: Strain) -> bool:
         """Check if the given strain exists in the spectrum.
 
         Args:

diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py
@@ -14,6 +14,7 @@
 from .pickler import save_pickled_data
 from .scoring.link_collection import LinkCollection
 from .scoring.metcalf_scoring import MetcalfScoring
+from .scoring.methods import ScoringMethod
 from .scoring.np_class_scoring import NPClassScoring
 from .scoring.rosetta_scoring import RosettaScoring
 
@@ -119,11 +120,11 @@ def save_repro_data(self, filename):
             logger.info(f"Saving reproducibility data to {filename}")
 
     @property
-    def root_dir(self):
+    def root_dir(self) -> str:
         """Returns path to the current dataset root directory.
 
         Returns:
-                the path to the dataset root directory currently in use
+            The path to the dataset root directory currently in use
         """
         return config.root_dir
 
@@ -154,7 +155,9 @@ def load_data(self):
         self._class_matches = self._loader.class_matches
 
     # TODO CG: refactor this method and update its unit tests
-    def get_links(self, input_objects, scoring_methods, and_mode=True):
+    def get_links(
+        self, input_objects: list, scoring_methods: list, and_mode: bool = True
+    ) -> LinkCollection:
         """Find links for a set of input objects (BGCs/GCFs/Spectra/MolFams).
 
         The input objects can be any mix of the following NPLinker types:
@@ -388,7 +391,7 @@ def class_matches(self):
         """ClassMatches with the matched classes and scoring tables from MIBiG."""
         return self._class_matches
 
-    def scoring_method(self, name):
+    def scoring_method(self, name: str) -> ScoringMethod | None:
         """Return an instance of a scoring method.
 
         Args:

diff --git a/src/nplinker/pairedomics/podp_antismash_downloader.py b/src/nplinker/pairedomics/podp_antismash_downloader.py
@@ -223,8 +223,7 @@ def get_best_available_genome_id(genome_id_data: dict[str, str]) -> str | None:
     """Get the best available ID from genome_id_data dict.
 
     Args:
-        genome_id_data: dictionary containing information
-        for each genome record present.
+        genome_id_data: dictionary containing information for each genome record present.
 
     Returns:
         ID for the genome, if present, otherwise None.

diff --git a/src/nplinker/pairedomics/strain_mappings_generator.py b/src/nplinker/pairedomics/strain_mappings_generator.py
@@ -297,7 +297,7 @@ def extract_mappings_ms_filename_spectrum_id(
         from GNPS website to a file with a default name defined in `GNPS_FILE_MAPPINGS_FILENAME`.
 
     See Also:
-        `GNPSFileMappingLoader`: A class to load GNPS file mappings file.
+        GNPSFileMappingLoader: A class to load GNPS file mappings file.
     """
     loader = GNPSFileMappingLoader(gnps_file_mappings_file)
     return loader.mapping_reversed

diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py
@@ -131,8 +131,7 @@ def download_url(
         md5: MD5 checksum of the download. If None, do not check.
         http_method: HTTP request method, e.g. "GET", "POST".
             Defaults to "GET".
-        allow_http_redirect: If true, enable following redirects
-         for all HTTP ("http:") methods.
+        allow_http_redirect: If true, enable following redirects for all HTTP ("http:") methods.
     """
     root = transform_to_full_path(root)
     # create the download directory if not exist
@@ -172,7 +171,7 @@ def list_dirs(root: str | PathLike, keep_parent: bool = True) -> list[str]:
 
     Args:
         root: Path to directory whose folders need to be listed
-        prefix: If true, prepends the path to each result, otherwise
+        keep_parent: If true, prepends the path to each result, otherwise
             only returns the name of the directories found
     """
     root = transform_to_full_path(root)