Skip to content

Commit

Permalink
remove loading of optional data
Browse files Browse the repository at this point in the history
The optional data, including GNPS params.xml file and description text, are not needed for core business of NPLinker. To keep the loading process simple (to keep refactored NPLinker as a minimum viable product), the loading of optional data is removed.  If these data are needed in the future, specific loaders should be added for them.
  • Loading branch information
CunliangGeng authored Jan 24, 2024
1 parent c4f6687 commit a709913
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 51 deletions.
30 changes: 0 additions & 30 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,6 @@ class DatasetLoader:
OR_MIBIG_JSON = "mibig_json_dir"
OR_STRAINS = "strain_mappings_file"
# misc files
OR_PARAMS = "gnps_params_file"
OR_DESCRIPTION = "description_file"
OR_INCLUDE_STRAINS = "include_strains_file"
# class predictions
OR_CANOPUS = "canopus_dir"
Expand Down Expand Up @@ -200,8 +198,6 @@ def load(self):
if not self._load_genomics():
return False

self._load_optional()

# Restrict strain list to only relevant strains (those that are present
# in both genomic and metabolomic data)
# TODO add a config file option for this?
Expand Down Expand Up @@ -242,12 +238,6 @@ def _init_paths(self):

self._init_genomics_paths()

# 12. MISC: <root>/params.xml
self.params_file = os.path.join(self._root, "params.xml")

# 13. MISC: <root>/description.txt
self.description_file = os.path.join(self._root, "description.txt")

# 14. MISC: <root>/include_strains.csv / include_strains_file=<override>
self.include_strains_file = self._config_overrides.get(
self.OR_INCLUDE_STRAINS
Expand Down Expand Up @@ -576,26 +566,6 @@ def _load_class_info(self):
self.chem_classes = chem_classes
return True

def _load_optional(self):
self.gnps_params = {}
if os.path.exists(self.params_file):
logger.debug("Loading params.xml")
tree = ET.parse(self.params_file)
root = tree.getroot()
# this file has a simple structure:
# <parameters>
# <parameter name="something">value</parameter>
# </parameters>
for param in root:
self.gnps_params[param.attrib["name"]] = param.text

logger.debug(f"Parsed {len(self.gnps_params)} GNPS params")

self.description_text = "<no description>"
if os.path.exists(self.description_file):
self.description_text = open(self.description_file).read()
logger.debug("Parsed description text")

def _filter_only_common_strains(self):
"""Filter strain population to only strains present in both genomic and molecular data."""
# TODO: Maybe there should be an option to specify which strains are used, both so we can
Expand Down
21 changes: 0 additions & 21 deletions src/nplinker/nplinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,27 +214,6 @@ def data_dir(self):
"""Returns path to nplinker/data directory (files packaged with the app itself)."""
return NPLINKER_APP_DATA_DIR

@property
def gnps_params(self):
"""Returns a dict containing data from GNPS params.xml (if available).
Returns:
dict: GNPS parameters, or an empty dict if none exist in the dataset
"""
return self._loader.gnps_params

@property
def dataset_description(self):
"""Returns dataset description.
If nplinker finds a 'description.txt' file in the root directory of the
dataset, the content will be parsed and made available through this property.
Returns:
str: the content of description.txt or '<no description>'
"""
return self._loader.description_text

@property
def bigscape_cutoff(self):
"""Returns the current BiGSCAPE clustering cutoff value."""
Expand Down

0 comments on commit a709913

Please sign in to comment.