From a709913a4a3e160696aa5dcd5fb43f05a4d146e3 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Wed, 24 Jan 2024 16:27:46 +0100 Subject: [PATCH] remove loading of optional data The optional data, including GNPS params.xml file and description text, are not needed for core business of NPLinker. To keep the loading process simple (to keep refactored NPLinker as a minimum viable product), the loading of optional data is removed. If these data are needed in the future, specific loaders should be added for them. --- src/nplinker/loader.py | 30 ------------------------------ src/nplinker/nplinker.py | 21 --------------------- 2 files changed, 51 deletions(-) diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index 17c70a01..521c6ec0 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -76,8 +76,6 @@ class DatasetLoader: OR_MIBIG_JSON = "mibig_json_dir" OR_STRAINS = "strain_mappings_file" # misc files - OR_PARAMS = "gnps_params_file" - OR_DESCRIPTION = "description_file" OR_INCLUDE_STRAINS = "include_strains_file" # class predictions OR_CANOPUS = "canopus_dir" @@ -200,8 +198,6 @@ def load(self): if not self._load_genomics(): return False - self._load_optional() - # Restrict strain list to only relevant strains (those that are present # in both genomic and metabolomic data) # TODO add a config file option for this? @@ -242,12 +238,6 @@ def _init_paths(self): self._init_genomics_paths() - # 12. MISC: /params.xml - self.params_file = os.path.join(self._root, "params.xml") - - # 13. MISC: /description.txt - self.description_file = os.path.join(self._root, "description.txt") - # 14. MISC: /include_strains.csv / include_strains_file= self.include_strains_file = self._config_overrides.get( self.OR_INCLUDE_STRAINS @@ -576,26 +566,6 @@ def _load_class_info(self): self.chem_classes = chem_classes return True - def _load_optional(self): - self.gnps_params = {} - if os.path.exists(self.params_file): - logger.debug("Loading params.xml") - tree = ET.parse(self.params_file) - root = tree.getroot() - # this file has a simple structure: - # - # value - # - for param in root: - self.gnps_params[param.attrib["name"]] = param.text - - logger.debug(f"Parsed {len(self.gnps_params)} GNPS params") - - self.description_text = "" - if os.path.exists(self.description_file): - self.description_text = open(self.description_file).read() - logger.debug("Parsed description text") - def _filter_only_common_strains(self): """Filter strain population to only strains present in both genomic and molecular data.""" # TODO: Maybe there should be an option to specify which strains are used, both so we can diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py index 036d39fd..3da4c5fc 100644 --- a/src/nplinker/nplinker.py +++ b/src/nplinker/nplinker.py @@ -214,27 +214,6 @@ def data_dir(self): """Returns path to nplinker/data directory (files packaged with the app itself).""" return NPLINKER_APP_DATA_DIR - @property - def gnps_params(self): - """Returns a dict containing data from GNPS params.xml (if available). - - Returns: - dict: GNPS parameters, or an empty dict if none exist in the dataset - """ - return self._loader.gnps_params - - @property - def dataset_description(self): - """Returns dataset description. - - If nplinker finds a 'description.txt' file in the root directory of the - dataset, the content will be parsed and made available through this property. - - Returns: - str: the content of description.txt or '' - """ - return self._loader.description_text - @property def bigscape_cutoff(self): """Returns the current BiGSCAPE clustering cutoff value."""