From ae739715b427c1d0739387e24512d54eda205ddf Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Wed, 25 Oct 2023 16:39:35 +0200 Subject: [PATCH] add `keep_mibig_only` to `BigscapeGCFLoader.get_gcfs` method - add parameter `keep_mibig_only` - update and add unit tests --- src/nplinker/genomics/bigscape/bigscape_loader.py | 14 ++++++++++++-- tests/genomics/test_bigscape_loader.py | 10 ++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/nplinker/genomics/bigscape/bigscape_loader.py b/src/nplinker/genomics/bigscape/bigscape_loader.py index 825d32dd..df7aa37a 100644 --- a/src/nplinker/genomics/bigscape/bigscape_loader.py +++ b/src/nplinker/genomics/bigscape/bigscape_loader.py @@ -25,8 +25,18 @@ def __init__(self, cluster_file: str | PathLike, /) -> None: self._gcf_dict = self._parse_gcf(self.cluster_file) self._gcf_list = list(self._gcf_dict.values()) - def get_gcfs(self) -> list[GCF]: - """Get all GCF objects.""" + def get_gcfs(self, keep_mibig_only=False) -> list[GCF]: + """Get all GCF objects. + + Args: + keep_mibig_only(bool): True to keep GCFs that contain only MIBiG + BGCs. + + Returns: + list[GCF]: a list of GCF objects. + """ + if not keep_mibig_only: + return [gcf for gcf in self._gcf_list if not gcf.has_mibig_only()] return self._gcf_list @staticmethod diff --git a/tests/genomics/test_bigscape_loader.py b/tests/genomics/test_bigscape_loader.py index 89d9079e..d2e2bf3a 100644 --- a/tests/genomics/test_bigscape_loader.py +++ b/tests/genomics/test_bigscape_loader.py @@ -22,13 +22,19 @@ def test_init(self, loader): "mix_clustering_c0.30.tsv") def test_get_gcfs(self, loader): - gcfs = loader.get_gcfs() + gcfs = loader.get_gcfs(keep_mibig_only=True) assert isinstance(gcfs, list) assert len(gcfs) == 114 assert isinstance(gcfs[0], GCF) + def test_get_gcfs_without_mibig_only(self, loader): + gcfs = loader.get_gcfs(keep_mibig_only=False) + assert isinstance(gcfs, list) + assert len(gcfs) == 113 + assert isinstance(gcfs[0], GCF) + def test_parse_gcf(self, loader): - gcf_dict = BigscapeGCFLoader._parse_gcf(loader.cluster_file) # noqa + gcf_dict = BigscapeGCFLoader._parse_gcf(loader.cluster_file) # noqa assert isinstance(gcf_dict, dict) assert len(gcf_dict) == 114 gcf = gcf_dict["135"]