Skip to content

Commit

Permalink
return list for get_bgcs methods of BGC loaders
Browse files Browse the repository at this point in the history
This PR changed the returned value of `get_bgcs` methods in BGC loaders from dict to list, making sure that methods `get_bgcs` and `get_gcfs` have consistent returned type.
  • Loading branch information
CunliangGeng authored Dec 14, 2023
1 parent f96f616 commit e6b758a
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 45 deletions.
5 changes: 2 additions & 3 deletions src/nplinker/genomics/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@ def get_files(self) -> dict[str, str]:
"""

@abstractmethod
def get_bgcs(self) -> dict[str, BGC]:
def get_bgcs(self) -> Sequence[BGC]:
"""Get BGC objects.
Returns:
dict[str, BGC]: key is BGC name and value is
:class:`~nplinker.genomic.BGC` objects
Sequence[BGC]: a list of :class:`~nplinker.genomic.BGC` objects
"""


Expand Down
19 changes: 7 additions & 12 deletions src/nplinker/genomics/antismash/antismash_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self, data_dir: str) -> None:
"""
self.data_dir = data_dir
self._file_dict = self._parse_data_dir(self.data_dir)
self._bgc_dict = self._parse_bgcs(self._file_dict)
self._bgcs = self._parse_bgcs(self._file_dict)

def get_bgc_genome_mapping(self) -> dict[str, str]:
"""Get the mapping from BGC to genome.
Expand Down Expand Up @@ -85,31 +85,26 @@ def _parse_data_dir(data_dir: str) -> dict[str, str]:

return bgc_files

def get_bgcs(self) -> dict[str, BGC]:
def get_bgcs(self) -> list[BGC]:
"""Get all BGC objects.
Returns:
dict[str, BGC]: key is BGC name and value is
:class:`~nplinker.genomic.BGC` objects
list[BGC]: a list of :class:`~nplinker.genomic.BGC` objects
"""
return self._bgc_dict
return self._bgcs

@staticmethod
def _parse_bgcs(bgc_files: dict[str, str]) -> dict[str, BGC]:
def _parse_bgcs(bgc_files: dict[str, str]) -> list[BGC]:
"""Load given BGC files as BGC objects.
Args:
bgc_files(dict[str, str]): key is BGC name and value is path to the
BGC gbk file, see method :meth:`.bgc_files`.
Returns:
dict[str, BGC]: key is BGC name and value is :class:`~nplinker.genomic.BGC` objects
list[BGC]: a list of :class:`~nplinker.genomic.BGC` objects
"""
bgcs = {}
for bgc_id in bgc_files:
bgc = parse_bgc_genbank(bgc_files[bgc_id])
bgcs[bgc_id] = bgc
return bgcs
return [parse_bgc_genbank(file) for file in bgc_files.values()]


def parse_bgc_genbank(file: str) -> BGC:
Expand Down
20 changes: 7 additions & 13 deletions src/nplinker/genomics/mibig/mibig_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, data_dir: str):
self.data_dir = data_dir
self._file_dict = self.parse_data_dir(self.data_dir)
self._metadata_dict = self._parse_metadatas()
self._bgc_dict = self._parse_bgcs()
self._bgcs = self._parse_bgcs()

def get_strain_bgc_mapping(self) -> dict[str, str]:
"""Get the mapping from strain to BGC.
Expand Down Expand Up @@ -85,27 +85,21 @@ def _parse_metadatas(self) -> dict[str, MibigMetadata]:
metadata_dict[name] = metadata
return metadata_dict

def get_bgcs(self) -> dict[str, BGC]:
def get_bgcs(self) -> list[BGC]:
"""Get BGC objects.
Returns:
dict[str, BGC]: key is BGC name and value is
:class:`nplinker.genomics.BGC` object
list[str, BGC]: a list of :class:`nplinker.genomics.BGC` objects
"""
return self._bgc_dict
return self._bgcs

def _parse_bgcs(self) -> dict[str, BGC]:
def _parse_bgcs(self) -> list[BGC]:
"""Parse all metadata files as BGC objects.
Returns:
dict[str, BGC]: key is BGC accession (file name) and value is
BGC object
list[BGC]: a list of BGC objects
"""
bgc_dict = {}
for name, file in self._file_dict.items():
bgc = parse_bgc_metadata_json(file)
bgc_dict[name] = bgc
return bgc_dict
return [parse_bgc_metadata_json(file) for file in self._file_dict.values()]


def parse_bgc_metadata_json(file: str) -> BGC:
Expand Down
8 changes: 4 additions & 4 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def __init__(self, config_data):
os.path.split(self._root)[-1] if not self._remote_loading else self._platform_id
)
self.bgcs, self.gcfs, self.spectra, self.molfams = [], [], [], []
self.mibig_bgc_dict = {}
self.mibig_bgcs = []
self._mibig_strain_bgc_mapping = {}
self.product_types = []
self.strains = StrainCollection()
Expand Down Expand Up @@ -374,7 +374,7 @@ def _validate_paths(self):

def _load_mibig(self):
mibig_bgc_loader = MibigLoader(self.mibig_json_dir)
self.mibig_bgc_dict = mibig_bgc_loader.get_bgcs()
self.mibig_bgcs = mibig_bgc_loader.get_bgcs()
self._mibig_strain_bgc_mapping = mibig_bgc_loader.get_strain_bgc_mapping()
return True

Expand Down Expand Up @@ -436,8 +436,8 @@ def _load_genomics(self):

# Step 1: load all BGC objects
logger.debug("Parsing AntiSMASH directory...")
antismash_bgc_dict = AntismashBGCLoader(self.antismash_dir).get_bgcs()
raw_bgcs = list(antismash_bgc_dict.values()) + list(self.mibig_bgc_dict.values())
antismash_bgcs = AntismashBGCLoader(self.antismash_dir).get_bgcs()
raw_bgcs = antismash_bgcs + self.mibig_bgcs

# Step 2: load all GCF objects
bigscape_cluster_file = (
Expand Down
9 changes: 5 additions & 4 deletions src/nplinker/nplinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def __init__(self, userconfig=None):
self._strains = None
self._metadata = {}
self._molfams = []
self._mibig_bgc_dict = {}
self._mibig_bgcs = []
self._chem_classes = None
self._class_matches = None

Expand Down Expand Up @@ -267,7 +267,7 @@ def load_data(self, new_bigscape_cutoff=None):
self._molfams = self._loader.molfams
self._bgcs = self._loader.bgcs
self._gcfs = self._loader.gcfs
self._mibig_bgc_dict = self._loader.mibig_bgc_dict
self._mibig_bgcs = self._loader.mibig_bgcs
self._strains = self._loader.strains
self._product_types = self._loader.product_types
self._chem_classes = self._loader.chem_classes
Expand Down Expand Up @@ -495,8 +495,9 @@ def metadata(self):
return self._metadata

@property
def mibig_bgc_dict(self):
return self._mibig_bgc_dict
def mibig_bgcs(self):
"""Get a list of all the MIBiG BGCs in the dataset."""
return self._mibig_bgcs

@property
def product_types(self):
Expand Down
7 changes: 2 additions & 5 deletions tests/genomics/antismash/test_antismash_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,9 @@ def test_parse_data_dir(self):

def test_get_bgcs(self, loader):
bgcs = loader.get_bgcs()
assert isinstance(bgcs, dict)
assert isinstance(bgcs, list)
assert len(bgcs) == 44
assert isinstance(bgcs["NZ_AZWB01000005.region001"], BGC)
assert isinstance(bgcs["NZ_AZWS01000001.region001"], BGC)
assert bgcs.get("GCF_000514855.1", "NotExist") == "NotExist"
assert bgcs.get("GCF_000514515.1", "NotExist") == "NotExist"
assert isinstance(bgcs[0], BGC)


def test_parse_bgc_genbank():
Expand Down
6 changes: 2 additions & 4 deletions tests/genomics/test_mibig_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,9 @@ def test_get_metadatas(self, loader):

def test_get_bgcs(self, loader):
bgcs = loader.get_bgcs()
assert isinstance(bgcs, dict)
assert isinstance(bgcs, list)
assert len(bgcs) == 2502 # MIBiG v3.1 has 2502 BGCs
assert "BGC0000001" in bgcs
assert "BGC0000246" not in bgcs
assert isinstance(bgcs["BGC0000001"], BGC)
assert isinstance(bgcs[0], BGC)


def test_parse_bgc_metadata_json():
Expand Down

0 comments on commit e6b758a

Please sign in to comment.