-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor filtering of user specified strains
Based on the new loading pipeline, the filtering of user strains is conducted just after loading strain mappings, which will simplify the upcoming loading pipeline. The change of loading process looks like below: **Before**: load strain mappings --> load BGC, GCF, spectra and MF --> filter user strains **Now**: load strain mappings --> filter user strains --> load BGC, GCF, spectra and MF Major Changes: - create `strain_loader.py` and add function `load_user_strains` - add schema for json file of user specified strains (now we require user to provide strains in a JSON file) - update the use of `load_user_strains` in loader.py - remove test_loader.py (the whole loading pipeline is ongoing, and tests will be added later)
- Loading branch information
1 parent
4016a64
commit bd45807
Showing
8 changed files
with
157 additions
and
204 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"$schema": "https://json-schema.org/draft/2020-12/schema", | ||
"$id": "https://raw.githubusercontent.com/NPLinker/nplinker/main/src/nplinker/schemas/user_strains.json", | ||
"title": "User specificed strains", | ||
"description": "A list of strain IDs specified by user", | ||
"type": "object", | ||
"required": [ | ||
"strain_ids" | ||
], | ||
"properties": { | ||
"strain_ids": { | ||
"type": "array", | ||
"title": "Strain IDs", | ||
"description": "A list of strain IDs specificed by user. The strain IDs must be the same as the ones in the strain mappings file.", | ||
"items": { | ||
"type": "string", | ||
"minLength": 1 | ||
}, | ||
"minItems": 1, | ||
"uniqueItems": true | ||
}, | ||
"version": { | ||
"type": "string", | ||
"enum": [ | ||
"1.0" | ||
] | ||
} | ||
}, | ||
"additionalProperties": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import json | ||
from os import PathLike | ||
from jsonschema import validate | ||
from nplinker.logconfig import LogConfig | ||
from nplinker.schemas import USER_STRAINS_SCHEMA | ||
from .strains import Strain | ||
|
||
|
||
logger = LogConfig.getLogger(__name__) | ||
|
||
|
||
def load_user_strains(json_file: str | PathLike) -> set[Strain]: | ||
"""Load user specified strains from a JSON file. | ||
The JSON file must follow the schema defined in "nplinker/schemas/user_strains.json". | ||
An example content of the JSON file: | ||
{"strain_ids": ["strain1", "strain2"]} | ||
Args: | ||
json_file(str | PathLike): Path to the JSON file containing user specified strains. | ||
Returns: | ||
set[Strain]: A set of user specified strains. | ||
""" | ||
with open(json_file, "r") as f: | ||
json_data = json.load(f) | ||
|
||
# validate json data | ||
validate(instance=json_data, schema=USER_STRAINS_SCHEMA) | ||
|
||
strains = set() | ||
for strain_id in json_data["strain_ids"]: | ||
strains.add(Strain(strain_id)) | ||
|
||
return strains |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import pytest | ||
from jsonschema import validate | ||
from jsonschema.exceptions import ValidationError | ||
from nplinker.schemas import USER_STRAINS_SCHEMA | ||
|
||
|
||
# Test schema aginast invalid data | ||
data_no_strain_ids = {"version": "1.0"} | ||
data_empty_strain_ids = {"strain_ids": [], "version": "1.0"} | ||
data_invalid_strain_ids = {"strain_ids": [1, 2, 3], "version": "1.0"} | ||
data_empty_version = {"strain_ids": ["strain1", "strain2"], "version": ""} | ||
data_invalid_version = {"strain_ids": ["strain1", "strain2"], "version": "1.0.0"} | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data, expected", | ||
[ | ||
[data_no_strain_ids, "'strain_ids' is a required property"], | ||
[data_empty_strain_ids, "[] is too short"], | ||
[data_invalid_strain_ids, "1 is not of type 'string'"], | ||
[data_empty_version, "'' is not one of ['1.0']"], | ||
[data_invalid_version, "'1.0.0' is not one of ['1.0']"], | ||
], | ||
) | ||
def test_invalid_data(data, expected): | ||
"""Test user strains schema against invalid data.""" | ||
with pytest.raises(ValidationError) as e: | ||
validate(data, USER_STRAINS_SCHEMA) | ||
assert e.value.message == expected | ||
|
||
|
||
# Test schema aginast valid data | ||
data = {"strain_ids": ["strain1", "strain2"], "version": "1.0"} | ||
data_no_version = {"strain_ids": ["strain1", "strain2"]} | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"data", | ||
[ | ||
data, | ||
data_no_version, | ||
], | ||
) | ||
def test_valid_data(data): | ||
"""Test user strains schema against valid data.""" | ||
try: | ||
validate(data, USER_STRAINS_SCHEMA) | ||
except ValidationError: | ||
pytest.fail("Unexpected ValidationError") |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.