From 020a028714c40d6280418b570c61269cb77a9f1d Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Mon, 27 Nov 2023 15:28:14 +0100 Subject: [PATCH] remove class `SingletonFamily` Class MolecularFamily can be used to represent singleton molecular family. --- src/nplinker/class_info/chem_classes.py | 2 +- src/nplinker/metabolomics/__init__.py | 3 +- src/nplinker/metabolomics/singleton_family.py | 10 ------ src/nplinker/nplinker.py | 2 -- src/nplinker/scoring/linking/data_links.py | 10 ------ tests/scoring/test_data_links.py | 31 ------------------- 6 files changed, 2 insertions(+), 56 deletions(-) delete mode 100644 src/nplinker/metabolomics/singleton_family.py diff --git a/src/nplinker/class_info/chem_classes.py b/src/nplinker/class_info/chem_classes.py index cffdf944..e859db36 100644 --- a/src/nplinker/class_info/chem_classes.py +++ b/src/nplinker/class_info/chem_classes.py @@ -555,7 +555,7 @@ def _read_cf_classes(self, mne_dir): nr_nodes = line.pop(0) # todo: make it easier to query classes of singleton families # if singleton family, format like '-1_spectrum-id' like canopus results - # CG: Note that the singleton families id is "singleton-" + uuid. + # Note that the singleton families id is "singleton-" + spectrum-id. if nr_nodes == "1": component = f"-1_{cluster}" class_info = [] diff --git a/src/nplinker/metabolomics/__init__.py b/src/nplinker/metabolomics/__init__.py index 72db2e65..6f39b4e9 100644 --- a/src/nplinker/metabolomics/__init__.py +++ b/src/nplinker/metabolomics/__init__.py @@ -1,10 +1,9 @@ import logging from .molecular_family import MolecularFamily -from .singleton_family import SingletonFamily from .spectrum import GNPS_KEY from .spectrum import Spectrum logging.getLogger(__name__).addHandler(logging.NullHandler()) -__all__ = ["MolecularFamily", "SingletonFamily", "GNPS_KEY", "Spectrum"] +__all__ = ["MolecularFamily", "GNPS_KEY", "Spectrum"] diff --git a/src/nplinker/metabolomics/singleton_family.py b/src/nplinker/metabolomics/singleton_family.py deleted file mode 100644 index be090eee..00000000 --- a/src/nplinker/metabolomics/singleton_family.py +++ /dev/null @@ -1,10 +0,0 @@ -import uuid -from .molecular_family import MolecularFamily - - -class SingletonFamily(MolecularFamily): - def __init__(self): - super().__init__("singleton-" + str(uuid.uuid4())) - - def __str__(self): - return f"Singleton molecular family (id={self.id})" diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py index f8998ecf..036d39fd 100644 --- a/src/nplinker/nplinker.py +++ b/src/nplinker/nplinker.py @@ -427,8 +427,6 @@ def get_common_strains( ) -> dict[tuple[Spectrum | MolecularFamily, GCF], list[Strain]]: """Get common strains between given spectra/molecular families and GCFs. - Note that SingletonFamily objects are excluded from given molecular families. - Args: met(Sequence[Spectrum] | Sequence[MolecularFamily]): A list of Spectrum or MolecularFamily objects. diff --git a/src/nplinker/scoring/linking/data_links.py b/src/nplinker/scoring/linking/data_links.py index 2a9ba042..a13b3097 100644 --- a/src/nplinker/scoring/linking/data_links.py +++ b/src/nplinker/scoring/linking/data_links.py @@ -6,7 +6,6 @@ from nplinker.genomics.gcf import GCF from nplinker.logconfig import LogConfig from nplinker.metabolomics import MolecularFamily -from nplinker.metabolomics import SingletonFamily from nplinker.metabolomics import Spectrum from .utils import calc_correlation_matrix from .utils import isinstance_all @@ -108,8 +107,6 @@ def get_common_strains( ) -> dict[tuple[Spectrum | MolecularFamily, GCF], list[Strain]]: """Get common strains between given spectra/molecular families and GCFs. - Note that SingletonFamily objects are excluded from given `spectra_or_mfs`. - Args: spectra_or_mfs(Sequence[Spectrum | MolecularFamily]): A list of Spectrum and/or MolecularFamily objects. @@ -138,8 +135,6 @@ def get_common_strains( strain_ids = self.occurrence_gcf_strain.columns results = {} for obj in spectra_or_mfs: - if isinstance(obj, SingletonFamily): - continue for gcf in gcfs: if isinstance(obj, Spectrum): shared_strains = strain_ids[ @@ -215,12 +210,7 @@ def _get_occurrence_mf_strain( strains as columns, where index is `mf.family_id` and column name is `strain.id`. The values are 1 if the molecular family contains the strain and 0 otherwise. - - Note that SingletonFamily objects are excluded from given `mfs`. """ - # remove SingletonFamily objects - mfs = [mf for mf in mfs if not isinstance(mf, SingletonFamily)] - df_mf_strain = pd.DataFrame( np.zeros((len(mfs), len(strains))), index=[mf.family_id for mf in mfs], diff --git a/tests/scoring/test_data_links.py b/tests/scoring/test_data_links.py index 40010a4f..488b2894 100644 --- a/tests/scoring/test_data_links.py +++ b/tests/scoring/test_data_links.py @@ -1,7 +1,6 @@ import pandas as pd import pytest from pandas.testing import assert_frame_equal -from nplinker.metabolomics import SingletonFamily def test_init(datalinks): @@ -173,36 +172,6 @@ def test_get_common_strains_spec_mf(datalinks, spectra, mfs, gcfs, strains_list) assert sut == expected -def test_get_common_strains_sf(datalinks, mfs, gcfs, strains_list): - """Test get_common_strains method for input SingletonFamily.""" - smf = SingletonFamily() - - sut = datalinks.get_common_strains([smf], gcfs) - assert sut == {} - - # the expected are same as `test_get_common_strains_mf` - mfs_mix = (*mfs[:2], smf) - sut = datalinks.get_common_strains(mfs_mix, gcfs) - expected = { - (mfs[0], gcfs[0]): [strains_list[0]], - (mfs[0], gcfs[1]): [], - (mfs[0], gcfs[2]): [strains_list[0]], - (mfs[1], gcfs[0]): [], - (mfs[1], gcfs[1]): [strains_list[1]], - (mfs[1], gcfs[2]): [strains_list[1]], - } - assert sut == expected - - sut = datalinks.get_common_strains(mfs_mix, gcfs, filter_no_shared=True) - expected = { - (mfs[0], gcfs[0]): [strains_list[0]], - (mfs[0], gcfs[2]): [strains_list[0]], - (mfs[1], gcfs[1]): [strains_list[1]], - (mfs[1], gcfs[2]): [strains_list[1]], - } - assert sut == expected - - def test_get_common_strains_invalid_value(datalinks, spectra, gcfs): """Test get_common_strains method for empty arguments.""" with pytest.raises(ValueError) as e: