diff --git a/.github/workflows/test-mars.yml b/.github/workflows/test-mars.yml index 0f16d39..b3f6566 100644 --- a/.github/workflows/test-mars.yml +++ b/.github/workflows/test-mars.yml @@ -42,3 +42,7 @@ jobs: - name: Linting run: ruff check mars_lib/ working-directory: ${{ env.working-directory }} + + - name: Type checking + run: mypy --install-types --non-interactive mars_lib/ + working-directory: ${{ env.working-directory }} diff --git a/mars-cli/.coveragerc b/mars-cli/.coveragerc index 2c3ba72..8e1fd7b 100644 --- a/mars-cli/.coveragerc +++ b/mars-cli/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = mars_lib/__init__.py, mars_lib/submit.py, mars_lib/credential.py +omit = mars_lib/__init__.py, mars_lib/submit.py, mars_lib/credential.py, mars_lib/models/__init__.py diff --git a/mars-cli/mars_cli.py b/mars-cli/mars_cli.py index 41e4c06..d891600 100644 --- a/mars-cli/mars_cli.py +++ b/mars-cli/mars_cli.py @@ -3,7 +3,7 @@ import pathlib from configparser import ConfigParser from mars_lib.target_repo import TargetRepository -from mars_lib.model import Investigation, IsaJson +from mars_lib.models.isa_json import Investigation, IsaJson from mars_lib.isa_json import load_isa_json from logging.handlers import RotatingFileHandler import requests @@ -13,7 +13,7 @@ # Load CLI configuration home_dir = ( - pathlib.Path(os.getenv("MARS_SETTINGS_DIR")) + pathlib.Path(str(os.getenv("MARS_SETTINGS_DIR"))) if os.getenv("MARS_SETTINGS_DIR") else pathlib.Path.home() ) diff --git a/mars-cli/mars_lib/authentication.py b/mars-cli/mars_lib/authentication.py index b5a3ab5..5155873 100644 --- a/mars-cli/mars_lib/authentication.py +++ b/mars-cli/mars_lib/authentication.py @@ -1,13 +1,14 @@ +from typing import Optional import requests import json def get_webin_auth_token( - credentials_dict, - header={"Content-Type": "application/json"}, - auth_base_url="https://wwwdev.ebi.ac.uk/ena/dev/submit/webin/auth/token", - token_expiration_time=1, -): + credentials_dict: dict[str, str], + header: dict[str, str] = {"Content-Type": "application/json"}, + auth_base_url: str = "https://wwwdev.ebi.ac.uk/ena/dev/submit/webin/auth/token", + token_expiration_time: int = 1, +) -> Optional[str]: """ Obtain Webin authentication token. @@ -15,7 +16,7 @@ def get_webin_auth_token( credentials_dict (dict): The password dictionary for authentication. header (dict): The header information. auth_base_url (str): The base URL for authentication. - token_expiration_time(int): Toke expiration time in hours. + token_expiration_time(int): Token expiration time in hours. Returns: str: The obtained token. diff --git a/mars-cli/mars_lib/biosamples_external_references.py b/mars-cli/mars_lib/biosamples_external_references.py index 25519bc..c321695 100644 --- a/mars-cli/mars_lib/biosamples_external_references.py +++ b/mars-cli/mars_lib/biosamples_external_references.py @@ -6,7 +6,7 @@ import os from jsonschema import validate from jsonschema.exceptions import ValidationError, SchemaError -from typing import Union +from typing import Union, Any, Optional, List # -- # # Hardcoded values @@ -21,7 +21,7 @@ # -- # # Code blocks # -- # -def load_json_file(file): +def load_json_file(file: str) -> Any: """ Function to load a JSON file as a dictionary. Args: @@ -46,7 +46,7 @@ def load_json_file(file): ) -def handle_input_dict(input): +def handle_input_dict(input: dict[str, str]) -> Optional[dict[str, str]]: """ Function to handle the input: assert that it's either a dictionary or the filepath to an existing file containing the dictionary @@ -73,7 +73,7 @@ def handle_input_dict(input): raise ValueError(f"The file '{input}' is not a valid JSON file.") -def get_header(token): +def get_header(token: str) -> dict[str, str]: """ Obtain the header using a token. @@ -90,7 +90,7 @@ def get_header(token): } -def validate_bs_accession(accession_str): +def validate_bs_accession(accession_str: str) -> None: """ Validates that the given accession string conforms to the specified regex format. See: https://registry.identifiers.org/registry/biosample @@ -108,8 +108,8 @@ def validate_bs_accession(accession_str): def validate_json_against_schema( - json_doc: Union[dict, str], json_schema: Union[dict, str] -): + json_doc: Union[dict[str, List[str]], str], json_schema: Union[dict[str, str], str] +) -> Optional[bool]: """ Validates a JSON document against a given JSON Schema. @@ -150,7 +150,7 @@ class BiosamplesRecord: production: boolean indicating environment mode """ - def __init__(self, bs_accession): + def __init__(self, bs_accession: str) -> None: """ Initialize the BiosamplesRecord with provided arguments. @@ -159,8 +159,11 @@ def __init__(self, bs_accession): """ validate_bs_accession(bs_accession) self.bs_accession = bs_accession + self.biosamples_credentials: Optional[dict[str, str]] = None + self.biosamples_externalReferences: List[str] = [] + self.production: bool = False - def display(self): + def display(self) -> None: """ Display the attributes for demonstration purposes. """ @@ -168,7 +171,7 @@ def display(self): print("Biosamples External References:", self.biosamples_externalReferences) print("Production Mode:", self.production) - def fetch_bs_json(self, biosamples_endpoint): + def fetch_bs_json(self, biosamples_endpoint: str) -> Optional[dict[str, str]]: """ Fetches the BioSample's record (JSON) of the accession. @@ -206,47 +209,49 @@ def fetch_bs_json(self, biosamples_endpoint): self.bs_json = response_json return self.bs_json - def load_bs_json(self, bs_json_file: str = None, bs_json: dict = None): + def load_bs_json( + self, bs_json: Union[str, dict[str, str]] + ) -> Optional[dict[str, str]]: """ Loads a given JSON, or the file containing it, as the BioSample's record (JSON) for this instance. It is an alternative to fetching it directly from BioSample. Args: - bs_json_file (str): The file containing the Biosamples JSON metadata of the accession - bs_json (dict): The already loaded Biosamples JSON metadata of the accession + bs_json Union[str, dict]: The already Biosamples JSON metadata of the accession either path to file or dictionary. """ - if bs_json: - if isinstance(bs_json, dict): - self.bs_json = bs_json - return self.bs_json - else: - raise TypeError( - f"Given 'bs_json' is of type '{type(bs_json)}' instead of type 'dict'." - ) - elif bs_json_file: - bs_json = load_json_file(bs_json_file) + if isinstance(bs_json, dict): self.bs_json = bs_json return self.bs_json + elif isinstance(bs_json, str): + bs_json_data = load_json_file(bs_json) + self.bs_json = bs_json_data + return self.bs_json else: raise ValueError( "Neither the file containing the Biosamples JSON nor the Biosamples JSON itself were given to load it into the instance." ) - def pop_links(self): + def pop_links(self) -> dict[str, str]: """ Removes "_links" array (which is added automatically after updating the biosamples on the BioSample's side). """ - if "_links" not in self.bs_json: - return self.bs_json + if "_links" in self.bs_json: + self.bs_json.pop("_links") - self.bs_json.pop("_links") return self.bs_json - def extend_externalReferences(self, new_ext_refs_list): + def extend_externalReferences( + self, new_ext_refs_list: List[dict[str, str]] + ) -> dict[str, str]: """Extends the JSON of the BioSample's record with new externalReferences""" if not self.bs_json: - self.fetch_bs_json() + endpoint = ( + biosamples_endpoints["prod"] + if self.production + else biosamples_endpoints["dev"] + ) + self.fetch_bs_json(endpoint) self.pop_links() if "externalReferences" not in self.bs_json: @@ -265,7 +270,9 @@ def extend_externalReferences(self, new_ext_refs_list): self.bs_json["externalReferences"] = ext_refs_list return self.bs_json - def update_remote_record(self, header, webin_auth="?authProvider=WEBIN"): + def update_remote_record( + self, header: dict[str, str], webin_auth: str = "?authProvider=WEBIN" + ) -> Optional[str]: """ Updates the remote record of the BioSample's accession with the current sample JSON. diff --git a/mars-cli/mars_lib/credential.py b/mars-cli/mars_lib/credential.py index 333c4c1..e534b81 100644 --- a/mars-cli/mars_lib/credential.py +++ b/mars-cli/mars_lib/credential.py @@ -52,19 +52,23 @@ class CredentialManager: - def __init__(self, service_name): + def __init__(self, service_name: str) -> None: self.service_name = service_name - def get_credential_env(self, username): + def get_credential_env(self, username: str) -> str: """ Retrieves a credential from environment variables. :param username: The environment variable username. :return: The value of the environment variable or None if not found. """ - return os.getenv(username) + result = os.getenv(username) + if result is None: + raise ValueError(f"Environment variable '{username}' not found.") - def prompt_for_password(self): + return result + + def prompt_for_password(self) -> str: """ Securely prompts the user to enter a password in the console. @@ -72,7 +76,7 @@ def prompt_for_password(self): """ return getpass.getpass(prompt="Enter your password: ") - def set_password_keyring(self, username, password): + def set_password_keyring(self, username: str, password: str) -> None: """ Stores a password in the keyring under the given username. @@ -81,16 +85,19 @@ def set_password_keyring(self, username, password): """ keyring.set_password(self.service_name, username, password) - def get_password_keyring(self, username): + def get_password_keyring(self, username: str) -> str: """ Retrieves a password from the keyring for the given username. :param username: The username whose password to retrieve. :return: The password or None if not found. """ - return keyring.get_password(self.service_name, username) + pwd = keyring.get_password(self.service_name, username) + if pwd is None: + raise ValueError(f"Password not found for username '{username}'.") + return pwd - def delete_password_keyring(self, username): + def delete_password_keyring(self, username: str) -> None: """ Deletes a password from the keyring for the given username. diff --git a/mars-cli/mars_lib/isa_json.py b/mars-cli/mars_lib/isa_json.py index af53897..9898113 100644 --- a/mars-cli/mars_lib/isa_json.py +++ b/mars-cli/mars_lib/isa_json.py @@ -1,8 +1,24 @@ import json -from typing import Union, List -from mars_lib.model import Investigation, Assay, Comment, IsaJson +from typing import Union, List, Any, Tuple, Optional +from mars_lib.models.isa_json import ( + Investigation, + Assay, + Comment, + IsaJson, + MaterialAttribute, + MaterialAttributeValue, + Study, + OntologyAnnotation, +) from pydantic import ValidationError -from mars_lib.target_repo import TARGET_REPO_KEY +from mars_lib.target_repo import TARGET_REPO_KEY, TargetRepository +import uuid +from mars_lib.models.repository_response import ( + RepositoryResponse, + Filter, + Accession, + Path, +) def reduce_isa_json_for_target_repo( @@ -22,13 +38,18 @@ def reduce_isa_json_for_target_repo( new_studies = [] studies = filtered_isa_json.studies for study in studies: - assays = study.assays - filtered_assays = [ - assay for assay in assays if is_assay_for_target_repo(assay, target_repo) - ] - if len(filtered_assays) > 0: - study.assays = filtered_assays - new_studies.append(study) + if target_repo == TargetRepository.BIOSAMPLES: + filtered_assays = [] + else: + assays = study.assays + filtered_assays = [ + assay + for assay in assays + if is_assay_for_target_repo(assay, target_repo) + ] + + study.assays = filtered_assays + new_studies.append(study) filtered_isa_json.studies = new_studies return filtered_isa_json @@ -43,9 +64,7 @@ def detect_target_repo_comment(comments: List[Comment]) -> Comment: Returns: Comment: The comment where the name corresponds with the name of the provided target repo. """ - for comment in comments: - if comment.name == TARGET_REPO_KEY: - return comment + return next(comment for comment in comments if comment.name == TARGET_REPO_KEY) def is_assay_for_target_repo(assay: Assay, target_repo: str) -> bool: @@ -86,3 +105,305 @@ def load_isa_json( return Investigation.model_validate(isa_json) else: return IsaJson.model_validate(isa_json).investigation + + +def get_filter_for_accession_key(accession: Accession, key: str) -> Optional[Filter]: + """ + Returns the studies node from the accession. + + Args: + accession (Accession): The accession to be searched. + key (str): The key to be searched. + + Returns: + Path: The studies node. + """ + return next((p.where for p in accession.path if p.key == key), None) + + +def apply_filter(filter: Filter, nodes: Union[List[Study], List[Assay]]) -> Any: + """ + Filters the studies based on the filter. + + Args: + filter (Filter): The filter to be applied. + studies (List[Study]): The studies to be filtered. + + Returns: + Study: The filtered study. + """ + filter_key = "id" if filter.key == "@id" else filter.key + return next( + (node for node in nodes if getattr(node, filter_key) == filter.value), None + ) + + +def accession_characteristic_category_present(node: Union[Study, Assay]) -> bool: + """ + Checks if the node has an accession characteristic category. + + Args: + node (Union[Study, Assay]): The study or assay to be checked. + + Returns: + bool: Boolean indicating whether the node has an accession characteristic category. + """ + accession_characteristics_categories = [ + char_cat + for char_cat in node.characteristicCategories + if char_cat.characteristicType + and char_cat.characteristicType.annotationValue == "accession" + ] + + if len(accession_characteristics_categories) > 1: + raise AttributeError( + "There should be not more than one accession characteristic category." + ) + elif len(accession_characteristics_categories) > 0: + return True + else: + return False + + +def accession_characteristic_present( + node: Union[Study, Assay], material_type_path: Path +) -> bool: + """ + Checks if the node has an accession characteristic. + + Args: + node (Union[Study, Assay]): The study or assay to be checked. + material_type_path (Path): The path to the material type. + + Returns: + bool: Boolean indicating whether the node has an accession characteristic. + """ + if material_type_path.where: + material = apply_filter( + material_type_path.where, getattr(node.materials, material_type_path.key) + ) + else: + raise ValueError( + f"'where' atribute is missing in path {material_type_path.key}." + ) + + accession_characteristics = [ + char + for char in material.characteristics + if char.category + and char.category.characteristicType + and char.category.characteristicType.annotationValue == "accession" + ] + + if len(accession_characteristics) > 1: + raise AttributeError( + "There should be not more than one accession characteristic." + ) + elif len(accession_characteristics) > 0: + return True + else: + return False + + +def add_accession_to_node( + node: Any, accession_number: str, material_type_path: Path +) -> None: + """ + Adds the accession number to the node. + + Args: + node (Any): The node to be updated. + accession_number (str): The accession number to be added. + material_type_path (Path): The path to the material type. + """ + if type(node) not in [Study, Assay]: + raise ValueError("Node must be either 'Study' or 'Assay'.") + + node_materials = getattr(node.materials, material_type_path.key) + if material_type_path.where: + updated_material = apply_filter(material_type_path.where, node_materials) + else: + raise ValueError( + f"'where' atribute is missing in path {material_type_path.key}." + ) + + accession_characteristics_category = next( + ( + char_cat + for char_cat in node.characteristicCategories + if char_cat.characteristicType + and char_cat.characteristicType.annotationValue == "accession" + ), + None, + ) + + if not accession_characteristics_category: + raise ValueError("Accession characteristic category is not present.") + + updated_material_accession_characteristic = next( + ( + char + for char in updated_material.characteristics + if char.category + and char.category.id == accession_characteristics_category.id + ), + None, + ) + updated_material.characteristics.remove(updated_material_accession_characteristic) + + if not updated_material_accession_characteristic: + raise ValueError("Accession characteristic is not present.") + + if updated_material_accession_characteristic.value and hasattr( + updated_material_accession_characteristic.value, "annotationValue" + ): + accession_ontology_annotation = OntologyAnnotation() + accession_ontology_annotation.id = ( + f"#ontology_annotation/accession_{updated_material.id}" + ) + accession_ontology_annotation.annotationValue = accession_number + updated_material_accession_characteristic.value = accession_ontology_annotation + else: + updated_material_accession_characteristic.value = accession_number + + updated_material.characteristics.append(updated_material_accession_characteristic) + print(f"{updated_material.id}: {updated_material_accession_characteristic.value}.") + + +def create_accession_characteristic_category( + node: Union[Study, Assay] +) -> Tuple[str, MaterialAttribute]: + """ + creates a new characteristic category for the accession number. + + Args: + node (Union[Study, Assay]): node to be updated + + Returns: + MaterialAttribute: The newly created characteristic category. + """ + if type(node) not in [Study, Assay]: + raise ValueError("Node must be either 'Study' or 'Assay'.") + + category = MaterialAttribute() + accession_id = str(uuid.uuid4()) + category.id = f"#characteristic_category/accession_{accession_id}" + category.characteristicType = OntologyAnnotation(annotationValue="accession") + node.characteristicCategories.append(category) + + return (accession_id, category) + + +def fetch_existing_characteristic_category( + node: Union[Study, Assay] +) -> Tuple[str, MaterialAttribute]: + """ + Fetches the existing characteristic category for the accession number. + + Args: + node (Union[Study, Assay]): study or assay to search + """ + accession_cat = next( + char_cat + for char_cat in node.characteristicCategories + if char_cat.characteristicType + and char_cat.characteristicType.annotationValue + and isinstance(char_cat.characteristicType.annotationValue, str) + and char_cat.characteristicType.annotationValue.lower() == "accession" + ) + if not accession_cat: + raise ValueError(f"Accession characteristic category not found in{node.id}.") + + accession_id = ( + accession_cat.id.split("_")[-1] if accession_cat.id else str(uuid.uuid4()) + ) + return (accession_id, accession_cat) + + +def create_accession_characteristic( + node: Union[Study, Assay], + material_type_path: Path, + category: MaterialAttribute, + accession_id: str, +) -> None: + """ + Creates a new characteristic for the accession number. + + Args: + node (Union[Study, Assay]): node to be updated + material_type_path (Path): path to the material type, + category (MaterialAttribute): characteristic category for the accession number. + accession_id (str): UUID for the accession. + """ + current_materials = getattr(node.materials, material_type_path.key) + if not material_type_path.where: + raise ValueError( + f"'where' atribute is missing in path {material_type_path.key}." + ) + + updated_material = apply_filter(material_type_path.where, current_materials) + + new_material_attribute_value = MaterialAttributeValue() + new_material_attribute_value.id = ( + f"#material_attribute_value/accession_{accession_id}" + ) + new_material_attribute_value.category = category + updated_material.characteristics.append(new_material_attribute_value) + + +def update_investigation( + investigation: Investigation, repo_response: RepositoryResponse +) -> Investigation: + """ + Adds the accession to the ISA JSON. + + Args: + isa_json (Investigation): The ISA JSON to be updated. + repo_response (RepositoryResponse): The response from the repository. + + Returns: + Investigation: The updated ISA JSON. + """ + updated_investigation = investigation.model_copy(deep=True) + for accession in repo_response.accessions: + + has_assay_in_path = [p for p in accession.path if p.key == "assays"] + target_level = "assay" if len(has_assay_in_path) > 0 else "study" + material_type_path = next( + p + for p in accession.path + if p.key in ["sources", "samples", "otherMaterials"] + ) + + study_filter = get_filter_for_accession_key(accession, "studies") + if not study_filter: + raise ValueError(f"Study filter is not present in {accession.path}.") + + updated_node = apply_filter(study_filter, updated_investigation.studies) + + if target_level == "assay": + assay_filter = get_filter_for_accession_key(accession, "assays") + if not assay_filter: + raise ValueError(f"Assay filter is not present in {accession.path}.") + + updated_node = apply_filter(assay_filter, updated_node.assays) + + if not updated_node: + raise ValueError(f"Node not found for {accession.value}.") + if not accession_characteristic_category_present(updated_node): + (accession_id, category) = create_accession_characteristic_category( + updated_node + ) + else: + (accession_id, category) = fetch_existing_characteristic_category( + updated_node + ) + + if not accession_characteristic_present(updated_node, material_type_path): + create_accession_characteristic( + updated_node, material_type_path, category, accession_id + ) + + add_accession_to_node(updated_node, accession.value, material_type_path) + + return updated_investigation diff --git a/mars-cli/mars_lib/model.py b/mars-cli/mars_lib/model.py deleted file mode 100644 index 9149224..0000000 --- a/mars-cli/mars_lib/model.py +++ /dev/null @@ -1,296 +0,0 @@ -from __future__ import annotations - -from enum import Enum -from typing import List, Optional, Union - -from pydantic import BaseModel, Field, field_validator, ConfigDict -from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY - - -class IsaBase(BaseModel): - # model_config = ConfigDict(extra="allow") - model_config = ConfigDict(extra="forbid") - - -class Comment(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - name: Optional[str] = Field(default=None) - value: Optional[str] = Field(default=None) - - -class OntologySourceReference(IsaBase): - comments: Optional[List[Comment]] = Field(default=[]) - description: Optional[str] = Field(default=None) - file: Optional[str] = Field(default=None) - name: Optional[str] = Field(default=None) - version: Optional[str] = Field(default=None) - - -# TODO: Question: Should these be case-sensitive? -class DataTypeEnum(str, Enum): - RAW_DATA_FILE = "Raw Data File" - DERIVED_DATA_FILE = "Derived Data File" - IMAGE_FILE = "Image File" - SPECTRAL_RAW_DATA_FILE = "Spectral Raw Data File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) - FREE_INDUCTION_DECAY_FILE = "Free Induction Decay File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) - - -class Data(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - comments: Optional[List[Comment]] = Field(default=[]) - name: Optional[str] = Field(default=None) - type: Optional[DataTypeEnum] = Field(default=None) - - @field_validator("type") - def apply_enum(cls, v): - if v not in [item.value for item in DataTypeEnum]: - raise ValueError("Invalid material type") - return v - - -class OntologyAnnotation(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - annotationValue: Union[Optional[str], Optional[float], Optional[int]] = Field( - default=[] - ) - comments: Optional[List[Comment]] = Field(default=[]) - termAccession: Optional[str] = Field(default=None) - termSource: Optional[str] = Field( - description="The abbreviated ontology name. It should correspond to one of the sources as specified in the ontologySourceReference section of the Investigation.", - default=None, - ) - - -class MaterialAttributeValue(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - category: Optional[MaterialAttribute] = Field(default=None) - value: Union[ - Optional[OntologyAnnotation], Optional[str], Optional[float], Optional[int] - ] = Field(default=None) - unit: Optional[OntologyAnnotation] = Field(default=None) - comments: Optional[List[Comment]] = Field( - default=[] - ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-attribute-value-schema-json) - - -class Factor(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - comments: Optional[List[Comment]] = Field(default=[]) - factorName: Optional[str] = Field(default=None) - factorType: Optional[OntologyAnnotation] = Field(default=None) - - -class FactorValue(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - category: Optional[Factor] = Field(default=None) - value: Union[ - Optional[str], Optional[float], Optional[int], Optional[OntologyAnnotation] - ] = Field(default=[]) - unit: Optional[OntologyAnnotation] = Field(default=None) - - -class Source(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - characteristics: Optional[List[MaterialAttributeValue]] = Field(default=[]) - name: Optional[str] = Field(default=None) - comments: Optional[List[Comment]] = Field( - default=[] - ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#source-schema-json) - - -class Sample(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - name: Optional[str] = Field(default=None) - characteristics: Optional[List[MaterialAttributeValue]] = Field(default=[]) - factorValues: Optional[List[FactorValue]] = Field(default=[]) - derivesFrom: Optional[List[Source]] = Field(default=[]) - comments: Optional[List[Comment]] = Field( - default=[] - ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#sample-schema-json) - - -class ProtocolParameter(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - parameterName: Optional[OntologyAnnotation] = Field(default=None) - - -class ProcessParameterValue(IsaBase): - category: Optional[ProtocolParameter] = Field(default=None) - value: Union[ - Optional[str], Optional[float], Optional[int], Optional[OntologyAnnotation] - ] = Field(default=[]) - unit: Optional[OntologyAnnotation] = Field(default=None) - - -# Helper class for protocol -> components -class Component(IsaBase): - componentName: Optional[str] = Field(default=None) - componentType: Optional[OntologyAnnotation] = Field(default=None) - - -class Protocol(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - comments: Optional[List[Comment]] = Field(default=[]) - components: Optional[List[Component]] = Field(default=[]) - description: Optional[str] = Field(default=None) - name: Optional[str] = Field(default=None) - parameters: Optional[List[ProtocolParameter]] = Field(default=[]) - protocolType: Optional[OntologyAnnotation] = Field(default=None) - uri: Optional[str] = Field(default=None) - version: Optional[str] = Field(default=None) - - -# Enum for material -> type -# TODO: Question: Should these be case-sensitive? -class MaterialTypeEnum(str, Enum): - EXTRACT_NAME = "Extract Name" - LABELED_EXTRACT_NAME = "Labeled Extract Name" - LIBRARY_NAME = "library name" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-schema-json) but was found in DataHub ISA-JSON and ARC ISA-JSON. - - -class Material(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - characteristics: List[MaterialAttributeValue] = Field(default=[]) - comments: Optional[List[Comment]] = Field(default=[]) - name: Optional[str] = Field(default=None) - type: Optional[str] = Field(default=None) - derivesFrom: Optional[List[Material]] = Field(default=[]) - - @field_validator("type") - def apply_enum(cls, v): - if v not in [item.value for item in MaterialTypeEnum]: - raise ValueError("Invalid material type") - return v - - -class Process(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - comments: Optional[List[Comment]] = Field(default=[]) - date: Optional[str] = Field(default=None) - executesProtocol: Optional[Protocol] = Field(default=None) - inputs: Optional[Union[List[Source], List[Sample], List[Material], list[Data]]] = ( - Field(default=[]) - ) - name: Optional[str] = Field(default=None) - nextProcess: Optional[Process] = Field(default=None) - outputs: Optional[Union[List[Sample], List[Material], list[Data]]] = Field( - default=[] - ) - parameterValues: Optional[List[ProcessParameterValue]] = Field(default=[]) - performer: Optional[str] = Field(default=None) - previousProcess: Optional[Process] = Field(default=None) - - -# Helper for assay -> materials -class AssayMaterialType(IsaBase): - samples: Optional[List[Sample]] = Field(default=[]) - otherMaterials: Optional[List[Material]] = Field(default=[]) - - -class Assay(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - characteristicCategories: Optional[List[MaterialAttribute]] = Field(default=[]) - comments: Optional[List[Comment]] = Field(default=[]) - dataFiles: Optional[List[Data]] = Field(default=[]) - filename: Optional[str] = Field(default=None) - materials: Optional[AssayMaterialType] = Field(default=None) - measurementType: Optional[OntologyAnnotation] = Field(default=None) - processSequence: Optional[List[Process]] = Field(default=[]) - technologyPlatform: Optional[str] = Field(default=None) - technologyType: Optional[OntologyAnnotation] = Field(default=None) - unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) - - @field_validator("comments") - def detect_target_repo_comments(cls, v): - target_repo_comments = [ - comment for comment in v if comment.name == TARGET_REPO_KEY - ] - if len(target_repo_comments) == 0: - raise ValueError("'target repository' comment is missing") - elif len(target_repo_comments) > 1: - raise ValueError("Multiple 'target repository' comments found") - else: - if target_repo_comments[0].value in [ - item.value for item in TargetRepository - ]: - return v - else: - raise ValueError( - f"Invalid 'target repository' value: '{target_repo_comments[0].value}'" - ) - - -class Person(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - address: Optional[str] = Field(default=None) - affiliation: Optional[str] = Field(default=None) - comments: Optional[List[Comment]] = Field(default=[]) - email: Optional[str] = Field(default=None) - fax: Optional[str] = Field(default=None) - firstName: Optional[str] = Field(default=None) - lastName: Optional[str] = Field(default=None) - midInitials: Optional[str] = Field(default=None) - phone: Optional[str] = Field(default=None) - roles: Optional[List[OntologyAnnotation]] = Field(default=[]) - - -class Publication(IsaBase): - authorList: Optional[str] = Field(default=None) - comments: Optional[List[Comment]] = Field(default=[]) - doi: Optional[str] = Field(default=None) - pubMedID: Optional[str] = Field(default=None) - status: Optional[OntologyAnnotation] = Field(default=None) - title: Optional[str] = Field(default=None) - - -class StudyMaterialType(IsaBase): - sources: Optional[List[Source]] = Field(default=[]) - samples: Optional[List[Sample]] = Field(default=[]) - otherMaterials: Optional[List[Material]] = Field(default=[]) - - -class MaterialAttribute(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - characteristicType: Optional[OntologyAnnotation] = Field(default=None) - - -class Study(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - assays: Optional[List[Assay]] = Field(default=[]) - characteristicCategories: Optional[List[MaterialAttribute]] = Field(default=[]) - comments: Optional[List[Comment]] = Field(default=[]) - description: Optional[str] = Field(default=None) - factors: Optional[List[Factor]] = Field(default=[]) - filename: Optional[str] = Field(default=None) - identifier: Optional[str] = Field(default=None) - materials: Optional[StudyMaterialType] - people: Optional[List[Person]] = Field(default=[]) - processSequence: Optional[List[Process]] = Field(default=[]) - protocols: Optional[List[Protocol]] = Field(default=[]) - publicReleaseDate: Optional[str] = Field(default=None) - publications: Optional[List[Publication]] = Field(default=[]) - studyDesignDescriptors: Optional[List[OntologyAnnotation]] = Field(default=[]) - submissionDate: Optional[str] = Field(default=None) - title: Optional[str] = Field(default=None) - unitCategories: Optional[List[OntologyAnnotation]] = Field(default=[]) - - -class Investigation(IsaBase): - id: Optional[str] = Field(alias="@id", default=None) - comments: Optional[List[Comment]] = Field(default=[]) - description: Optional[str] = Field(default=None) - filename: Optional[str] = Field(default=None) - identifier: Optional[str] = Field(default=None) - ontologySourceReferences: Optional[List[OntologySourceReference]] = Field( - default=[] - ) - people: Optional[List[Person]] = Field(default=[]) - publicReleaseDate: Optional[str] = Field(default=None) - publications: Optional[List[Publication]] = Field(default=[]) - studies: Optional[List[Study]] = Field(default=[]) - submissionDate: Optional[str] = Field(default=None) - title: Optional[str] = Field(default=None) - - -class IsaJson(IsaBase): - investigation: Investigation diff --git a/mars-cli/mars_lib/models/__init__.py b/mars-cli/mars_lib/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mars-cli/mars_lib/models/isa_json.py b/mars-cli/mars_lib/models/isa_json.py new file mode 100644 index 0000000..ead6b34 --- /dev/null +++ b/mars-cli/mars_lib/models/isa_json.py @@ -0,0 +1,286 @@ +from __future__ import annotations + +from enum import Enum +from typing import List, Optional, Union + +from pydantic import BaseModel, Field, field_validator, ConfigDict +from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY + + +class IsaBase(BaseModel): + # model_config = ConfigDict(extra="allow") + model_config = ConfigDict(extra="forbid") + + +class Comment(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + name: Optional[str] = None + value: Optional[str] = None + + +class OntologySourceReference(IsaBase): + comments: List[Comment] = [] + description: Optional[str] = None + file: Optional[str] = None + name: Optional[str] = None + version: Optional[str] = None + + +# TODO: Question: Should these be case-sensitive? +class DataTypeEnum(str, Enum): + RAW_DATA_FILE = "Raw Data File" + DERIVED_DATA_FILE = "Derived Data File" + IMAGE_FILE = "Image File" + SPECTRAL_RAW_DATA_FILE = "Spectral Raw Data File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) + FREE_INDUCTION_DECAY_FILE = "Free Induction Decay File" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/) + + +class Data(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + comments: List[Comment] = [] + name: Optional[str] = None + type: Optional[DataTypeEnum] = None + + @field_validator("type") + def apply_enum(cls, v: str) -> str: + if v not in [item.value for item in DataTypeEnum]: + raise ValueError("Invalid material type") + return v + + +class OntologyAnnotation(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + annotationValue: Union[Optional[str], Optional[float], Optional[int]] = Field( + default=None + ) + comments: List[Comment] = [] + termAccession: Optional[str] = None + termSource: Optional[str] = Field( + description="The abbreviated ontology name. It should correspond to one of the sources as specified in the ontologySourceReference section of the Investigation.", + default=None, + ) + + +class MaterialAttributeValue(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + category: Optional[MaterialAttribute] = None + value: Union[str, float, int, OntologyAnnotation, None] = None + unit: Optional[OntologyAnnotation] = None + comments: List[Comment] = Field( + default=[] + ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-attribute-value-schema-json) + + +class Factor(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + comments: List[Comment] = [] + factorName: Optional[str] = None + factorType: Optional[OntologyAnnotation] = None + + +class FactorValue(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + category: Optional[Factor] = None + value: Union[str, float, int, OntologyAnnotation, None] = None + unit: Optional[OntologyAnnotation] = None + + +class Source(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + characteristics: List[MaterialAttributeValue] = [] + name: Optional[str] = None + comments: List[Comment] = Field( + default=[] + ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#source-schema-json) + + +class Sample(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + name: Optional[str] = None + characteristics: List[MaterialAttributeValue] = [] + factorValues: List[FactorValue] = [] + derivesFrom: List[Source] = [] + comments: List[Comment] = Field( + default=[] + ) # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#sample-schema-json) + + +class ProtocolParameter(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + parameterName: Optional[OntologyAnnotation] = None + + +class ProcessParameterValue(IsaBase): + category: Optional[ProtocolParameter] = None + value: Union[str, float, int, OntologyAnnotation, None] = None + unit: Optional[OntologyAnnotation] = None + + +# Helper class for protocol -> components +class Component(IsaBase): + componentName: Optional[str] = None + componentType: Optional[OntologyAnnotation] = None + + +class Protocol(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + comments: List[Comment] = [] + components: List[Component] = [] + description: Optional[str] = None + name: Optional[str] = None + parameters: List[ProtocolParameter] = [] + protocolType: Optional[OntologyAnnotation] = None + uri: Optional[str] = None + version: Optional[str] = None + + +# Enum for material -> type +# TODO: Question: Should these be case-sensitive? +class MaterialTypeEnum(str, Enum): + EXTRACT_NAME = "Extract Name" + LABELED_EXTRACT_NAME = "Labeled Extract Name" + LIBRARY_NAME = "library name" # TODO: QUESTION: This is not mentioned in the specs (https://isa-specs.readthedocs.io/en/latest/isajson.html#material-schema-json) but was found in DataHub ISA-JSON and ARC ISA-JSON. + + +class Material(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + characteristics: List[MaterialAttributeValue] = [] + comments: List[Comment] = [] + name: Optional[str] = None + type: Optional[str] = None + derivesFrom: List[Material] = [] + + @field_validator("type") + def apply_enum(cls, v: str) -> str: + if v not in [item.value for item in MaterialTypeEnum]: + raise ValueError("Invalid material type") + return v + + +class Process(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + comments: List[Comment] = [] + date: Optional[str] = None + executesProtocol: Optional[Protocol] = None + inputs: Optional[Union[List[Source], List[Sample], List[Material], list[Data]]] = [] + name: Optional[str] = None + nextProcess: Optional[Process] = None + outputs: Optional[Union[List[Sample], List[Material], list[Data]]] = Field( + default=[] + ) + parameterValues: List[ProcessParameterValue] = [] + performer: Optional[str] = None + previousProcess: Optional[Process] = None + + +# Helper for assay -> materials +class AssayMaterialType(IsaBase): + samples: List[Sample] = [] + otherMaterials: List[Material] = [] + + +class Assay(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + characteristicCategories: List[MaterialAttribute] = [] + comments: List[Comment] = [] + dataFiles: List[Data] = [] + filename: Optional[str] = None + materials: Optional[AssayMaterialType] = None + measurementType: Optional[OntologyAnnotation] = None + processSequence: List[Process] = [] + technologyPlatform: Optional[str] = None + technologyType: Optional[OntologyAnnotation] = None + unitCategories: List[OntologyAnnotation] = [] + + @field_validator("comments") + def detect_target_repo_comments(cls, v: List[Comment]) -> Optional[List[Comment]]: + target_repo_comments = [ + comment for comment in v if comment.name == TARGET_REPO_KEY + ] + if len(target_repo_comments) == 0: + raise ValueError("'target repository' comment is missing") + elif len(target_repo_comments) > 1: + raise ValueError("Multiple 'target repository' comments found") + else: + if target_repo_comments[0].value in [ + item.value for item in TargetRepository + ]: + return v + else: + raise ValueError( + f"Invalid 'target repository' value: '{target_repo_comments[0].value}'" + ) + + +class Person(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + address: Optional[str] = None + affiliation: Optional[str] = None + comments: List[Comment] = [] + email: Optional[str] = None + fax: Optional[str] = None + firstName: Optional[str] = None + lastName: Optional[str] = None + midInitials: Optional[str] = None + phone: Optional[str] = None + roles: List[OntologyAnnotation] = [] + + +class Publication(IsaBase): + authorList: Optional[str] = None + comments: List[Comment] = [] + doi: Optional[str] = None + pubMedID: Optional[str] = None + status: Optional[OntologyAnnotation] = None + title: Optional[str] = None + + +class StudyMaterialType(IsaBase): + sources: List[Source] = [] + samples: List[Sample] = [] + otherMaterials: List[Material] = [] + + +class MaterialAttribute(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + characteristicType: Optional[OntologyAnnotation] = None + + +class Study(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + assays: List[Assay] = [] + characteristicCategories: List[MaterialAttribute] = [] + comments: List[Comment] = [] + description: Optional[str] = None + factors: List[Factor] = [] + filename: Optional[str] = None + identifier: Optional[str] = None + materials: Optional[StudyMaterialType] + people: List[Person] = [] + processSequence: List[Process] = [] + protocols: List[Protocol] = [] + publicReleaseDate: Optional[str] = None + publications: List[Publication] = [] + studyDesignDescriptors: List[OntologyAnnotation] = [] + submissionDate: Optional[str] = None + title: Optional[str] = None + unitCategories: List[OntologyAnnotation] = [] + + +class Investigation(IsaBase): + id: Optional[str] = Field(alias="@id", default=None) + comments: List[Comment] = [] + description: Optional[str] = None + filename: Optional[str] = None + identifier: Optional[str] = None + ontologySourceReferences: List[OntologySourceReference] = Field(default=[]) + people: List[Person] = [] + publicReleaseDate: Optional[str] = None + publications: List[Publication] = [] + studies: List[Study] = [] + submissionDate: Optional[str] = None + title: Optional[str] = None + + +class IsaJson(IsaBase): + investigation: Investigation diff --git a/mars-cli/mars_lib/models/repository_response.py b/mars-cli/mars_lib/models/repository_response.py new file mode 100644 index 0000000..1fd9ec2 --- /dev/null +++ b/mars-cli/mars_lib/models/repository_response.py @@ -0,0 +1,68 @@ +import json +from typing import List, Optional +from pydantic import BaseModel, field_validator, Field, ConfigDict +import pydantic +import pydantic.alias_generators +from mars_lib.target_repo import TargetRepository + + +class Filter(BaseModel): + key: str + value: str + + +class Path(BaseModel): + key: str + where: Optional[Filter] = None + + +class Accession(BaseModel): + path: List[Path] = [] + value: str + + @field_validator("path") + def validate_path(cls, path: List[Path]) -> List[Path]: + keys = [p.key for p in path] + if len(keys) != len(set(keys)): + raise ValueError("Duplicate keys found in path list") + return path + + +class Error(BaseModel): + type: str + message: str + path: List[Path] = [] + + +class Info(BaseModel): + name: Optional[str] = None + message: str + + +class RepositoryResponse(BaseModel): + # This is a Pydantic configuration that will convert the field names to camel case and be accessible as alias. + model_config = ConfigDict(alias_generator=pydantic.alias_generators.to_camel) + + target_repository: str = Field(alias="targetRepository") + accessions: List[Accession] = [] + errors: List[Error] = [] + info: List[Info] = [] + + @field_validator("target_repository") + def validate_target_repository(cls, v: str) -> str: + if v not in [item.value for item in TargetRepository]: + raise ValueError(f"Invalid 'target repository' value: '{v}'") + return v + + @classmethod + def from_json_file(cls, json_file_path: str) -> "RepositoryResponse": + with open(json_file_path, "r") as file: + data = json.load(file) + + return cls.model_validate(data) + + @classmethod + def from_json(cls, json_string: str) -> "RepositoryResponse": + data = json.loads(json_string) + + return cls.model_validate(data) diff --git a/mars-cli/mars_lib/submit.py b/mars-cli/mars_lib/submit.py index 7c095be..bb56b43 100644 --- a/mars-cli/mars_lib/submit.py +++ b/mars-cli/mars_lib/submit.py @@ -1,3 +1,4 @@ +from mars_lib.models.isa_json import Investigation from mars_lib.authentication import get_webin_auth_token from mars_lib.biosamples_external_references import ( get_header, @@ -6,11 +7,36 @@ validate_json_against_schema, input_json_schema_filepath, ) +from mars_lib.isa_json import reduce_isa_json_for_target_repo +from mars_lib.target_repo import TargetRepository +import requests +from typing import Any + + +def submit_to_biosamples( + investiagation: Investigation, + biosamples_credentials: dict[str, str], + url: str, +) -> requests.Response: + bs_input_investiagation = reduce_isa_json_for_target_repo( + investiagation, TargetRepository.BIOSAMPLES + ) + + webin_token = get_webin_auth_token(biosamples_credentials) + s = requests.Session() + s.headers.update({"accept": "application/json", "Content-Type": "application/json"}) + return s.post( + url, + params={"webinjwt": webin_token}, + data=bs_input_investiagation.model_dump_json(), + ) def create_external_references( - biosamples_credentials, biosamples_externalReferences, production -): + biosamples_credentials: dict[str, str], + biosamples_externalReferences: dict[str, Any], + production: bool, +) -> None: """ Main function to be executed when script is run. @@ -28,6 +54,8 @@ def create_external_references( json_doc=biosamples_externalReferences, json_schema=input_json_schema_filepath ) token = get_webin_auth_token(biosamples_credentials) + if not token: + raise ValueError("The token could not be generated.") header = get_header(token) for biosample_r in biosamples_externalReferences["biosampleExternalReferences"]: diff --git a/mars-cli/setup.py b/mars-cli/setup.py index 6dd6f34..ab44451 100644 --- a/mars-cli/setup.py +++ b/mars-cli/setup.py @@ -42,6 +42,7 @@ def run(self): "ruff", "pytest", "pytest-cov", + "mypy", ] }, project_urls={ diff --git a/mars-cli/tests/fixtures/json_responses/biosamples_success_reponse.json b/mars-cli/tests/fixtures/json_responses/biosamples_success_reponse.json new file mode 100644 index 0000000..6b8063f --- /dev/null +++ b/mars-cli/tests/fixtures/json_responses/biosamples_success_reponse.json @@ -0,0 +1,62 @@ +{ + "targetRepository": "ena", + "errors": [], + "info": [ + { + "name": "Submission date", + "message": "2024-03-22" + }, + { + "name": "Release date", + "message": "2025-03-22" + } + ], + "accessions": [ + { + "path": [ + { + "key": "studies", + "where": { + "key": "title", + "value": "Arabidopsis thaliana" + } + }, + { + "key": "materials", + "where": null + }, + { + "key": "sources", + "where": { + "key": "@id", + "value": "#source/330" + } + } + ], + "value": "SAMEA130788488" + }, + { + "path": [ + { + "key": "studies", + "where": { + "key": "title", + "value": "Arabidopsis thaliana" + } + }, + { + "key": "materials", + "where": null + }, + { + "key": "samples", + "where": { + "key": "@id", + "value": "#sample/331" + } + } + ], + "value": "SAMEA130788489" + } + ] +} \ No newline at end of file diff --git a/mars-cli/tests/fixtures/json_responses/failure_reponse.json b/mars-cli/tests/fixtures/json_responses/failure_reponse.json new file mode 100644 index 0000000..85800a1 --- /dev/null +++ b/mars-cli/tests/fixtures/json_responses/failure_reponse.json @@ -0,0 +1,45 @@ +{ + "targetRepository": "ena", + "errors": [ + { + "type": "INVALID_METADATA", + "message": "Missing required field collection_date", + "path": [ + { + "key": "studies", + "where": { + "key": "title", + "value": "Arabidopsis thaliana" + } + } + ] + }, + { + "type": "INVALID_DATA", + "message": "Could not locate file fake2.bam in the upload location", + "path": [ + { + "key": "studies", + "where": { + "key": "title", + "value": "Arabidopsis thaliana" + } + }, + { + "key": "assays", + "where": { + "key": "@id", + "value": "#assay/18_20_21" + } + }, + { + "key": "dataFiles", + "where": { + "key": "@id", + "value": "#data/334" + } + } + ] + } + ] +} \ No newline at end of file diff --git a/mars-cli/tests/test_biosample_external_references.py b/mars-cli/tests/test_biosample_external_references.py index 9cefe5d..494f986 100644 --- a/mars-cli/tests/test_biosample_external_references.py +++ b/mars-cli/tests/test_biosample_external_references.py @@ -88,14 +88,12 @@ def test_fetch_bs_json(): def test_load_bs_json(): bs_record_from_json = BiosamplesRecord("SAMEA112654119") - bs_record_from_json.load_bs_json( - bs_json_file="./tests/fixtures/SAMEA112654119.json" - ) + bs_record_from_json.load_bs_json("./tests/fixtures/SAMEA112654119.json") assert bs_record_from_json.bs_json["accession"] == "SAMEA112654119" bs_dict = load_json_file("./tests/fixtures/SAMEA112654119.json") bs_record_from_dict = BiosamplesRecord("SAMEA112654119") - bs_record_from_dict.load_bs_json(bs_json=bs_dict) + bs_record_from_dict.load_bs_json(bs_dict) assert bs_record_from_dict.bs_json["accession"] == "SAMEA112654119" bs_record_from_bad_json = BiosamplesRecord("SAMEA112654119") @@ -103,21 +101,19 @@ def test_load_bs_json(): ValueError, match="The file content of the given file './tests/fixtures/bad_json.json' is not valid JSON.", ): - bs_record_from_bad_json.load_bs_json( - bs_json_file="./tests/fixtures/bad_json.json" - ) + bs_record_from_bad_json.load_bs_json("./tests/fixtures/bad_json.json") bs_record_from_bad_dict = BiosamplesRecord("SAMEA112654119") with pytest.raises( - TypeError, - match="Given 'bs_json' is of type '' instead of type 'dict'.", + FileNotFoundError, + match="The file 'This is not even a dict!' does not exist.", ): - bs_record_from_bad_dict.load_bs_json(bs_json="This is not even a dict!") + bs_record_from_bad_dict.load_bs_json("This is not even a dict!") def test_extend_externalReferences(): bs_record = BiosamplesRecord("SAMEA112654119") - bs_record.load_bs_json(bs_json_file="./tests/fixtures/SAMEA112654119.json") + bs_record.load_bs_json("./tests/fixtures/SAMEA112654119.json") new_ext_refs_list = [] bs_record.extend_externalReferences(new_ext_refs_list) diff --git a/mars-cli/tests/test_isa_json.py b/mars-cli/tests/test_isa_json.py index 730629c..8c815cd 100644 --- a/mars-cli/tests/test_isa_json.py +++ b/mars-cli/tests/test_isa_json.py @@ -1,11 +1,14 @@ from mars_lib.isa_json import ( reduce_isa_json_for_target_repo, load_isa_json, + update_investigation, ) from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY import pytest from pydantic import ValidationError -from mars_lib.model import Data, Material, Assay, Person +from mars_lib.models.isa_json import Data, Material, Assay, Person, IsaJson +from mars_lib.models.repository_response import RepositoryResponse +import json def test_load_isa_json(): @@ -42,6 +45,18 @@ def test_reduce_isa_json_for_target_repo(): assert len(filtered_isa_json_study.assays) == 1 +def test_reduce_isa_json_for_biosamples(): + good_isa_json = load_isa_json( + "../test-data/ISA-BH2023-ALL/isa-bh2023-all.json", True + ) + + filtered_isa_json = reduce_isa_json_for_target_repo( + good_isa_json, TargetRepository.BIOSAMPLES + ) + + assert len(filtered_isa_json.studies[0].assays) == 0 + + def test_data_type_validator(): valid_data_json = {"@id": "data_001", "name": "data 1", "type": "Image File"} @@ -146,3 +161,65 @@ def test_person_phone_nr_validator(): with pytest.raises(ValidationError, match="Invalid number format"): Person.model_validate(invalid_person_json) + + +def test_update_study_materials_no_accession_categories(): + # This file has no characteristics for accessions + json_path = "../test-data/biosamples-original-isa-no-accesion-char.json" + with open(json_path) as json_file: + json_data = json.load(json_file) + + validated_isa_json = IsaJson.model_validate(json_data) + + respose_file_path = "tests/fixtures/json_responses/biosamples_success_reponse.json" + repo_response = RepositoryResponse.from_json_file(respose_file_path) + + updated_investigation = update_investigation( + validated_isa_json.investigation, repo_response + ) + + # Check the accession number of the source + # Accession characteristic is of type String + assert ( + updated_investigation.studies[0].materials.sources[0].characteristics[-1].value + == repo_response.accessions[0].value + ) + + # Check the accession number of the sample + # Accession characteristic is of type String + assert ( + updated_investigation.studies[0].materials.samples[0].characteristics[-1].value + == repo_response.accessions[1].value + ) + + +def test_update_study_materials_with_accession_categories(): + # This file has no characteristics for accessions + json_path = "../test-data/biosamples-original-isa.json" + with open(json_path) as json_file: + json_data = json.load(json_file) + + validated_isa_json = IsaJson.model_validate(json_data) + + respose_file_path = "tests/fixtures/json_responses/biosamples_success_reponse.json" + repo_response = RepositoryResponse.from_json_file(respose_file_path) + + updated_investigation = update_investigation( + validated_isa_json.investigation, repo_response + ) + # Check the accession number of the source + # Accession characteristic is of type OntologyAnnotation + assert ( + updated_investigation.studies[0] + .materials.sources[0] + .characteristics[-1] + .value.annotationValue + == repo_response.accessions[0].value + ) + + # Check the accession number of the sample + # Accession characteristic is of type String + assert ( + updated_investigation.studies[0].materials.samples[0].characteristics[-1].value + == repo_response.accessions[1].value + ) diff --git a/test-data/biosamples-original-isa-no-accesion-char.json b/test-data/biosamples-original-isa-no-accesion-char.json new file mode 100644 index 0000000..4d0e711 --- /dev/null +++ b/test-data/biosamples-original-isa-no-accesion-char.json @@ -0,0 +1,982 @@ +{ + "investigation": { + "identifier": "", + "title": "Bob's investigation", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Bob's investigation.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2022-11-07T08:09:59Z" + }, + { + "name": "SEEK Project name", + "value": "Bob's PhD project" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/2" + }, + { + "name": "SEEK Investigation ID", + "value": "19" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/5", + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "Arabidopsis thaliana", + "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "Arabidopsis thaliana.txt", + "comments": [ + { + "name": "SEEK Study ID", + "value": "10" + }, + { + "name": "SEEK creation date", + "value": "2022-11-03T16:20:49Z" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/5", + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/Title_317", + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/Description_318", + "characteristicType": { + "annotationValue": "Description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/tax_id_319", + "characteristicType": { + "annotationValue": "tax_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/organism_320", + "characteristicType": { + "annotationValue": "organism", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/cell_type_321", + "characteristicType": { + "annotationValue": "cell_type", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/dev_stage_322", + "characteristicType": { + "annotationValue": "dev_stage", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_323", + "characteristicType": { + "annotationValue": "collection_date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/isolation_source_324", + "characteristicType": { + "annotationValue": "isolation_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collected_by_325", + "characteristicType": { + "annotationValue": "collected_by", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_327", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_328", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/330", + "name": "plant 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/Title_317" + }, + "value": { + "annotationValue": "plant 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/Description_318" + }, + "value": { + "annotationValue": "plant in the lab", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/tax_id_319" + }, + "value": { + "annotationValue": "NCBI:txid3702", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/organism_320" + }, + "value": { + "annotationValue": "Arabidopsis thaliana", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/cell_type_321" + }, + "value": { + "annotationValue": "na", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/dev_stage_322" + }, + "value": { + "annotationValue": "budding", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_323" + }, + "value": { + "annotationValue": "01/01/2022", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/isolation_source_324" + }, + "value": { + "annotationValue": "seed", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collected_by_325" + }, + "value": { + "annotationValue": "Bob", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326" + }, + "value": { + "annotationValue": "Belgium", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_327" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_328" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/331", + "name": "leaf 1", + "derivesFrom": [ + { + "@id": "#source/330" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/18_10", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/19_18", + "name": "nucleic acid extraction", + "protocolType": { + "annotationValue": "nucleic acid extraction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/20_20", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/349", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/351", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/352", + "parameterName": { + "annotationValue": "library source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/353", + "parameterName": { + "annotationValue": "library strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/354", + "parameterName": { + "annotationValue": "library selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/355", + "parameterName": { + "annotationValue": "library layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/356", + "parameterName": { + "annotationValue": "insert size", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/21_21", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/363", + "parameterName": { + "annotationValue": "sequencing instrument", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/331", + "name": "", + "executesProtocol": { + "@id": "#protocol/18_10" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/330" + } + ], + "outputs": [ + { + "@id": "#sample/331" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/18_20_21", + "filename": "a_assays.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/Title_350", + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_358", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_359", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_360", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [ + { + "@id": "#sample/331" + } + ], + "otherMaterials": [ + { + "@id": "#other_material/332", + "name": "extract 1", + "type": "Extract Name", + "characteristics": [], + "derivesFrom": [ + { + "@id": "#sample/331" + } + ] + }, + { + "@id": "#other_material/333", + "name": "library 1", + "type": "Extract Name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/Title_350" + }, + "value": { + "annotationValue": "library 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_358" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_359" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_360" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#other_material/332" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/nucleic_acid_extraction/332", + "name": "", + "executesProtocol": { + "@id": "#protocol/19_18" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/332" + }, + "nextProcess": { + "@id": "#process/library_construction/332" + }, + "inputs": [ + { + "@id": "#sample/331" + } + ], + "outputs": [ + { + "@id": "#other_material/332" + } + ] + }, + { + "@id": "#process/library_construction/333", + "name": "", + "executesProtocol": { + "@id": "#protocol/20_20" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/349" + }, + "value": { + "annotationValue": "lib prep", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/351" + }, + "value": { + "annotationValue": "Test", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/352" + }, + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/353" + }, + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/354" + }, + "value": { + "annotationValue": "RT-PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/355" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/356" + }, + "value": { + "annotationValue": "100", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/nucleic_acid_extraction/333" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/333" + }, + "inputs": [ + { + "@id": "#other_material/332" + } + ], + "outputs": [ + { + "@id": "#other_material/333" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/334", + "name": "", + "executesProtocol": { + "@id": "#protocol/21_21" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/363" + }, + "value": { + "annotationValue": " MinION", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/334" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/333" + } + ], + "outputs": [ + { + "@id": "#data_file/334" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data/334", + "name": "fake2.bam", + "type": "Raw Data File", + "comments": [ + { + "name": "file type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "9840f585055afc37de353706fd31a377" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] + } +} \ No newline at end of file diff --git a/test-data/biosamples-original-isa.json b/test-data/biosamples-original-isa.json new file mode 100644 index 0000000..bbd6121 --- /dev/null +++ b/test-data/biosamples-original-isa.json @@ -0,0 +1,1005 @@ +{ + "investigation": { + "identifier": "", + "title": "Bob's investigation", + "description": "", + "submissionDate": "", + "publicReleaseDate": "", + "ontologySourceReferences": [], + "filename": "Bob's investigation.txt", + "comments": [ + { + "name": "ISAjson export time", + "value": "2022-11-07T08:09:59Z" + }, + { + "name": "SEEK Project name", + "value": "Bob's PhD project" + }, + { + "name": "SEEK Project ID", + "value": "http://localhost:3000/single_pages/2" + }, + { + "name": "SEEK Investigation ID", + "value": "19" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/5", + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studies": [ + { + "identifier": "", + "title": "Arabidopsis thaliana", + "description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n", + "submissionDate": "", + "publicReleaseDate": "", + "filename": "Arabidopsis thaliana.txt", + "comments": [ + { + "name": "SEEK Study ID", + "value": "10" + }, + { + "name": "SEEK creation date", + "value": "2022-11-03T16:20:49Z" + } + ], + "publications": [], + "people": [ + { + "@id": "#people/5", + "lastName": "Bob", + "firstName": "Bob", + "midInitials": "", + "email": "bob@testing.com", + "phone": "", + "fax": "", + "address": "", + "affiliation": "", + "roles": [ + { + "termAccession": "", + "termSource": "", + "annotationValue": "" + } + ], + "comments": [ + { + "@id": "", + "value": "", + "name": "" + } + ] + } + ], + "studyDesignDescriptors": [], + "characteristicCategories": [ + { + "@id": "#characteristic_category/Title_317", + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/Description_318", + "characteristicType": { + "annotationValue": "Description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/tax_id_319", + "characteristicType": { + "annotationValue": "tax_id", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/organism_320", + "characteristicType": { + "annotationValue": "organism", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/cell_type_321", + "characteristicType": { + "annotationValue": "cell_type", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/dev_stage_322", + "characteristicType": { + "annotationValue": "dev_stage", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collection_date_323", + "characteristicType": { + "annotationValue": "collection_date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/isolation_source_324", + "characteristicType": { + "annotationValue": "isolation_source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/collected_by_325", + "characteristicType": { + "annotationValue": "collected_by", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326", + "characteristicType": { + "annotationValue": "geographic location (country and/or sea)", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_327", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_328", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_329", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "sources": [ + { + "@id": "#source/330", + "name": "plant 1", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/Title_317" + }, + "value": { + "annotationValue": "plant 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/Description_318" + }, + "value": { + "annotationValue": "plant in the lab", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/tax_id_319" + }, + "value": { + "annotationValue": "NCBI:txid3702", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/organism_320" + }, + "value": { + "annotationValue": "Arabidopsis thaliana", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/cell_type_321" + }, + "value": { + "annotationValue": "na", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/dev_stage_322" + }, + "value": { + "annotationValue": "budding", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collection_date_323" + }, + "value": { + "annotationValue": "01/01/2022", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/isolation_source_324" + }, + "value": { + "annotationValue": "seed", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/collected_by_325" + }, + "value": { + "annotationValue": "Bob", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_326" + }, + "value": { + "annotationValue": "Belgium", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_327" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_328" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_329" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ], + "samples": [ + { + "@id": "#sample/331", + "name": "leaf 1", + "derivesFrom": [ + { + "@id": "#source/330" + } + ], + "characteristics": [], + "factorValues": [ + { + "category": { + "@id": "" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ] + } + ] + }, + "protocols": [ + { + "@id": "#protocol/18_10", + "name": "sample collection", + "protocolType": { + "annotationValue": "sample collection", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/19_18", + "name": "nucleic acid extraction", + "protocolType": { + "annotationValue": "nucleic acid extraction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/20_20", + "name": "library construction", + "protocolType": { + "annotationValue": "library construction", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/349", + "parameterName": { + "annotationValue": "library_construction_protocol", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/351", + "parameterName": { + "annotationValue": "design_description", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/352", + "parameterName": { + "annotationValue": "library source", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/353", + "parameterName": { + "annotationValue": "library strategy", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/354", + "parameterName": { + "annotationValue": "library selection", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/355", + "parameterName": { + "annotationValue": "library layout", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#parameter/356", + "parameterName": { + "annotationValue": "insert size", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + }, + { + "@id": "#protocol/21_21", + "name": "nucleic acid sequencing", + "protocolType": { + "annotationValue": "nucleic acid sequencing", + "termAccession": "", + "termSource": "" + }, + "description": "", + "uri": "", + "version": "", + "parameters": [ + { + "@id": "#parameter/363", + "parameterName": { + "annotationValue": "sequencing instrument", + "termAccession": "", + "termSource": "" + } + } + ], + "components": [ + { + "componentName": "", + "componentType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + } + } + ] + } + ], + "processSequence": [ + { + "@id": "#process/sample_collection/331", + "name": "", + "executesProtocol": { + "@id": "#protocol/18_10" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": {}, + "nextProcess": {}, + "inputs": [ + { + "@id": "#source/330" + } + ], + "outputs": [ + { + "@id": "#sample/331" + } + ] + } + ], + "assays": [ + { + "@id": "#assay/18_20_21", + "filename": "a_assays.txt", + "measurementType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyType": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "technologyPlatform": "", + "characteristicCategories": [ + { + "@id": "#characteristic_category/Title_350", + "characteristicType": { + "annotationValue": "Title", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/submission_date_358", + "characteristicType": { + "annotationValue": "submission date", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/status_359", + "characteristicType": { + "annotationValue": "status", + "termAccession": "", + "termSource": "" + } + }, + { + "@id": "#characteristic_category/accession_360", + "characteristicType": { + "annotationValue": "accession", + "termAccession": "", + "termSource": "" + } + } + ], + "materials": { + "samples": [ + { + "@id": "#sample/331" + } + ], + "otherMaterials": [ + { + "@id": "#other_material/332", + "name": "extract 1", + "type": "Extract Name", + "characteristics": [], + "derivesFrom": [ + { + "@id": "#sample/331" + } + ] + }, + { + "@id": "#other_material/333", + "name": "library 1", + "type": "Extract Name", + "characteristics": [ + { + "category": { + "@id": "#characteristic_category/Title_350" + }, + "value": { + "annotationValue": "library 1", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/submission_date_358" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/status_359" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#characteristic_category/accession_360" + }, + "value": { + "annotationValue": "", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "derivesFrom": [ + { + "@id": "#other_material/332" + } + ] + } + ] + }, + "processSequence": [ + { + "@id": "#process/nucleic_acid_extraction/332", + "name": "", + "executesProtocol": { + "@id": "#protocol/19_18" + }, + "parameterValues": [], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/sample_collection/332" + }, + "nextProcess": { + "@id": "#process/library_construction/332" + }, + "inputs": [ + { + "@id": "#sample/331" + } + ], + "outputs": [ + { + "@id": "#other_material/332" + } + ] + }, + { + "@id": "#process/library_construction/333", + "name": "", + "executesProtocol": { + "@id": "#protocol/20_20" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/349" + }, + "value": { + "annotationValue": "lib prep", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/351" + }, + "value": { + "annotationValue": "Test", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/352" + }, + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/353" + }, + "value": { + "annotationValue": "OTHER", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/354" + }, + "value": { + "annotationValue": "RT-PCR", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/355" + }, + "value": { + "annotationValue": "SINGLE", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + }, + { + "category": { + "@id": "#parameter/356" + }, + "value": { + "annotationValue": "100", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/nucleic_acid_extraction/333" + }, + "nextProcess": { + "@id": "#process/nucleic_acid_sequencing/333" + }, + "inputs": [ + { + "@id": "#other_material/332" + } + ], + "outputs": [ + { + "@id": "#other_material/333" + } + ] + }, + { + "@id": "#process/nucleic_acid_sequencing/334", + "name": "", + "executesProtocol": { + "@id": "#protocol/21_21" + }, + "parameterValues": [ + { + "category": { + "@id": "#parameter/363" + }, + "value": { + "annotationValue": " MinION", + "termSource": "", + "termAccession": "" + }, + "unit": { + "termSource": "", + "termAccession": "", + "comments": [] + } + } + ], + "performer": "", + "date": "", + "previousProcess": { + "@id": "#process/library_construction/334" + }, + "nextProcess": {}, + "inputs": [ + { + "@id": "#other_material/333" + } + ], + "outputs": [ + { + "@id": "#data_file/334" + } + ] + } + ], + "dataFiles": [ + { + "@id": "#data/334", + "name": "fake2.bam", + "type": "Raw Data File", + "comments": [ + { + "name": "file type", + "value": "bam" + }, + { + "name": "file checksum", + "value": "9840f585055afc37de353706fd31a377" + }, + { + "name": "submission date", + "value": "" + }, + { + "name": "status", + "value": "" + }, + { + "name": "accession", + "value": "" + } + ] + } + ], + "unitCategories": [] + } + ], + "factors": [], + "unitCategories": [] + } + ] + } +} \ No newline at end of file