Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add default output path #241

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/nplinker/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
ANTISMASH_DEFAULT_PATH: Path = config.root_dir / "antismash"
BIGSCAPE_DEFAULT_PATH: Path = config.root_dir / "bigscape"
BIGSCAPE_RUNNING_OUTPUT_PATH: Path = BIGSCAPE_DEFAULT_PATH / "bigscape_running_output"
OUTPUT_DEFAULT_PATH: Path = config.root_dir / "output"
13 changes: 9 additions & 4 deletions src/nplinker/scoring/metcalf_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import TYPE_CHECKING
import numpy as np
import pandas as pd
from nplinker.defaults import OUTPUT_DEFAULT_PATH
from nplinker.genomics import GCF
from nplinker.logconfig import LogConfig
from nplinker.metabolomics import MolecularFamily
Expand Down Expand Up @@ -31,11 +32,13 @@ class MetcalfScoring(ScoringMethod):
DATALINKS: The DataLinks object to use for scoring.
LINKFINDER: The LinkFinder object to use for scoring.
NAME: The name of the scoring method. This is set to 'metcalf'.
CACHE: The name of the cache file to use for storing the MetcalfScoring.
"""

DATALINKS = None
LINKFINDER = None
NAME = "metcalf"
CACHE = "cache_metcalf_scoring.pckl"

def __init__(self, npl: NPLinker) -> None:
"""Create a MetcalfScoring object.
Expand Down Expand Up @@ -69,9 +72,8 @@ def setup(npl: NPLinker):
)
)

cache_dir = os.path.join(npl.root_dir, "metcalf")
cache_file = os.path.join(cache_dir, "metcalf_scores.pckl")
os.makedirs(cache_dir, exist_ok=True)
OUTPUT_DEFAULT_PATH.mkdir(exist_ok=True)
cache_file = OUTPUT_DEFAULT_PATH / MetcalfScoring.CACHE

# the metcalf preprocessing can take a long time for large datasets, so it's
# better to cache as the data won't change unless the number of objects does
Expand Down Expand Up @@ -115,7 +117,8 @@ def setup(npl: NPLinker):

# TODO CG: is it needed? remove it if not
@property
def datalinks(self) -> DataLinks:
def datalinks(self) -> DataLinks | None:
"""Get the DataLinks object used for scoring."""
return MetcalfScoring.DATALINKS

def get_links(
Expand Down Expand Up @@ -309,10 +312,12 @@ def _calc_standardised_score_gen(

# TODO CG: refactor this method
def format_data(self, data):
"""Format the data for display."""
# for metcalf the data will just be a floating point value (i.e. the score)
return f"{data:.4f}"

# TODO CG: refactor this method
def sort(self, objects, reverse=True):
"""Sort the objects based on the score."""
# sort based on score
return sorted(objects, key=lambda objlink: objlink[self], reverse=reverse)
2 changes: 1 addition & 1 deletion tests/integration/test_nplinker_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def npl() -> NPLinker:
npl.load_data()
# remove cached score results before running tests
root_dir = Path(npl.root_dir)
score_cache = root_dir / "metcalf" / "metcalf_scores.pckl"
score_cache = root_dir / "output" / "cache_metcalf_scoring.pckl"
score_cache.unlink(missing_ok=True)
return npl

Expand Down
Loading