remove loading of optional data

The optional data, including GNPS params.xml file and description text, are not needed for core business of NPLinker. To keep the loading process simple (to keep refactored NPLinker as a minimum viable product), the loading of optional data is removed. If these data are needed in the future, specific loaders should be added for them.
NPLinker · Jan 24, 2024 · a709913 · a709913
1 parent c4f6687
commit a709913
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 51 deletions.
diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py
@@ -76,8 +76,6 @@ class DatasetLoader:
     OR_MIBIG_JSON = "mibig_json_dir"
     OR_STRAINS = "strain_mappings_file"
     # misc files
-    OR_PARAMS = "gnps_params_file"
-    OR_DESCRIPTION = "description_file"
     OR_INCLUDE_STRAINS = "include_strains_file"
     # class predictions
     OR_CANOPUS = "canopus_dir"
@@ -200,8 +198,6 @@ def load(self):
         if not self._load_genomics():
             return False
 
-        self._load_optional()
-
         # Restrict strain list to only relevant strains (those that are present
         # in both genomic and metabolomic data)
         # TODO add a config file option for this?
@@ -242,12 +238,6 @@ def _init_paths(self):
 
         self._init_genomics_paths()
 
-        # 12. MISC: <root>/params.xml
-        self.params_file = os.path.join(self._root, "params.xml")
-
-        # 13. MISC: <root>/description.txt
-        self.description_file = os.path.join(self._root, "description.txt")
-
         # 14. MISC: <root>/include_strains.csv / include_strains_file=<override>
         self.include_strains_file = self._config_overrides.get(
             self.OR_INCLUDE_STRAINS
@@ -576,26 +566,6 @@ def _load_class_info(self):
         self.chem_classes = chem_classes
         return True
 
-    def _load_optional(self):
-        self.gnps_params = {}
-        if os.path.exists(self.params_file):
-            logger.debug("Loading params.xml")
-            tree = ET.parse(self.params_file)
-            root = tree.getroot()
-            # this file has a simple structure:
-            # <parameters>
-            #   <parameter name="something">value</parameter>
-            # </parameters>
-            for param in root:
-                self.gnps_params[param.attrib["name"]] = param.text
-
-            logger.debug(f"Parsed {len(self.gnps_params)} GNPS params")
-
-        self.description_text = "<no description>"
-        if os.path.exists(self.description_file):
-            self.description_text = open(self.description_file).read()
-            logger.debug("Parsed description text")
-
     def _filter_only_common_strains(self):
         """Filter strain population to only strains present in both genomic and molecular data."""
         # TODO: Maybe there should be an option to specify which strains are used, both so we can

diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py
@@ -214,27 +214,6 @@ def data_dir(self):
         """Returns path to nplinker/data directory (files packaged with the app itself)."""
         return NPLINKER_APP_DATA_DIR
 
-    @property
-    def gnps_params(self):
-        """Returns a dict containing data from GNPS params.xml (if available).
-
-        Returns:
-            dict: GNPS parameters, or an empty dict if none exist in the dataset
-        """
-        return self._loader.gnps_params
-
-    @property
-    def dataset_description(self):
-        """Returns dataset description.
-
-        If nplinker finds a 'description.txt' file in the root directory of the
-        dataset, the content will be parsed and made available through this property.
-
-        Returns:
-            str: the content of description.txt or '<no description>'
-        """
-        return self._loader.description_text
-
     @property
     def bigscape_cutoff(self):
         """Returns the current BiGSCAPE clustering cutoff value."""