Skip to content

Commit

Permalink
Merge branch 'develop' into develop-fix-gh-workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
linglp authored Sep 20, 2024
2 parents 85eed54 + 21aa2b8 commit 8dcc572
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 53 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,12 @@ jobs:
| python3 - --version ${{ env.POETRY_VERSION }};
poetry config virtualenvs.create true;
poetry config virtualenvs.in-project true;
#----------------------------------------------
# install dependencies and root project
#----------------------------------------------
- name: Install dependencies and root project
run: poetry install --no-interaction --all-extras

#----------------------------------------------
# perform linting
#----------------------------------------------
Expand Down
17 changes: 12 additions & 5 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -1578,12 +1578,14 @@ def process_row_annotations(
(isinstance(anno_v, str) and anno_v.strip() == "")
or (isinstance(anno_v, float) and np.isnan(anno_v))
):
annos.pop(anno_k) if anno_k in annos.keys() else annos
annos["annotations"]["annotations"].pop(anno_k) if anno_k in annos[
"annotations"
]["annotations"].keys() else annos["annotations"]["annotations"]
continue

# Otherwise save annotation as approrpriate
if isinstance(anno_v, float) and np.isnan(anno_v):
annos[anno_k] = ""
annos["annotations"]["annotations"][anno_k] = ""
continue

# Handle strings that match the csv_list_regex and pass the validation rule
Expand All @@ -1597,10 +1599,11 @@ def process_row_annotations(
node_validation_rules = dmge.get_node_validation_rules(**param)

if rule_in_rule_list("list", node_validation_rules):
annos[anno_k] = anno_v.split(",")
annos["annotations"]["annotations"][anno_k] = anno_v.split(",")
continue
# default: assign the original value
annos[anno_k] = anno_v
annos["annotations"]["annotations"][anno_k] = anno_v

return annos

@async_missing_entity_handler
Expand Down Expand Up @@ -1656,6 +1659,7 @@ async def format_row_annotations(
annos = await self.get_async_annotation(entityId)

csv_list_regex = comma_separated_list_regex()

annos = self.process_row_annotations(
dmge=dmge,
metadata_syn=metadataSyn,
Expand Down Expand Up @@ -1926,7 +1930,10 @@ async def _process_store_annos(self, requests: Set[asyncio.Task]) -> None:
else:
# store annotations if they are not None
if annos:
normalized_annos = {k.lower(): v for k, v in annos.items()}
normalized_annos = {
k.lower(): v
for k, v in annos["annotations"]["annotations"].items()
}
entity_id = normalized_annos["entityid"]
logger.info(
f"Obtained and processed annotations for {entity_id} entity"
Expand Down
10 changes: 5 additions & 5 deletions tests/data/mock_manifests/ValidFilenameManifest.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Component,Filename,Id,entityId
MockFilename,schematic - main/TestSubmitMockFilename/txt1.txt,3c4c384b-5c49-4a7c-90a6-43f03a5ddbdc,syn62822369
MockFilename,schematic - main/TestSubmitMockFilename/txt2.txt,3b45f5f3-408f-47ff-945e-9badf0a43195,syn62822368
MockFilename,schematic - main/TestSubmitMockFilename/txt3.txt,2bbc898f-2651-4af3-834a-10c506de0fbd,syn62822366
MockFilename,schematic - main/TestSubmitMockFilename/txt4.txt,5a2d3816-436e-458f-9887-cb8355518e23,syn62822364
Component,Sample ID,Filename,Id,entityId
MockFilename,1.0,schematic - main/TestSubmitMockFilename/txt1.txt,3c4c384b-5c49-4a7c-90a6-43f03a5ddbdc,syn62822369
MockFilename,2.0,schematic - main/TestSubmitMockFilename/txt2.txt,3b45f5f3-408f-47ff-945e-9badf0a43195,syn62822368
MockFilename,3.0,schematic - main/TestSubmitMockFilename/txt3.txt,2bbc898f-2651-4af3-834a-10c506de0fbd,syn62822366
MockFilename,4.0,schematic - main/TestSubmitMockFilename/txt4.txt,5a2d3816-436e-458f-9887-cb8355518e23,syn62822364
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Filename,Sample ID,File Format,Component,Genome Build,Genome FASTA,Id,entityId
schematic - main/Test Filename Upload/txt1.txt,10.0,,BulkRNA-seqAssay,,,01ded8fc-0915-4959-85ab-64e9644c8787,syn62276954
schematic - main/Test Filename Upload/txt2.txt,20.0,,BulkRNA-seqAssay,,,fd122bb5-3353-4c94-b1f5-0bb93a3e9fc9,syn62276956
243 changes: 210 additions & 33 deletions tests/integration/test_metadata_model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
"""
This script contains a test suite for verifying the submission and annotation of
file-based manifests using the `TestMetadataModel` class to communicate with Synapse
and verify the expected behavior of uploading annotation manifest CSVs using the
metadata model.
It utilizes the `pytest` framework along with `pytest-mock` to mock and spy on methods
of the `SynapseStorage` class, which is responsible for handling file uploads and
annotations in Synapse.
"""

import logging
import pytest
import tempfile

from contextlib import nullcontext as does_not_raise

import pytest
from pytest_mock import MockerFixture

from schematic.store.synapse import SynapseStorage
from tests.conftest import metadata_model

Expand All @@ -12,36 +24,152 @@


class TestMetadataModel:
# Define the test cases as a class attribute
test_cases = [
# Test 1: Check that a valid manifest can be submitted, and corresponding entities annotated from it
(
"mock_manifests/filepath_submission_test_manifest.csv",
"syn62276880",
None,
"syn62280543",
"syn53011753",
None,
),
# Test 2: Change the Sample ID annotation from the previous test to ensure the manifest file is getting updated
(
"mock_manifests/filepath_submission_test_manifest_sampleidx10.csv",
"syn62276880",
None,
"syn62280543",
"syn53011753",
None,
),
# Test 3: Test manifest file upload with validation based on the MockFilename component and given dataset_scope
(
"mock_manifests/ValidFilenameManifest.csv",
"syn62822337",
"MockFilename",
"syn62822975",
"syn63192751",
"syn62822337",
),
]

def validate_manifest_annotations(
self,
manifest_annotations,
manifest_entity_type,
expected_entity_id,
manifest_file_contents=None,
):
"""
Validates that the annotations on a manifest entity (file or table) were correctly updated
by comparing the annotations on the manifest entity with the contents of the manifest file itself,
and ensuring the eTag annotation is not empty.
This method is wrapped by ``_submit_and_verify_manifest()``
Arguments:
manifest_annotations (pd.DataFrame): manifest annotations
manifest_entity_type (str): type of manifest (file or table)
expected_entity_id (str): expected entity ID of the manifest
manifest_file_contents (pd.DataFrame): manifest file contents
Returns:
None
"""
# Check that the eTag annotation is not empty
assert len(manifest_annotations["eTag"][0]) > 0

# Check that entityId is expected
assert manifest_annotations["entityId"][0] == expected_entity_id

# For manifest files only: Check that all other annotations from the manifest match the annotations in the manifest file itself
if manifest_entity_type.lower() != "file":
return
for annotation in manifest_annotations.keys():
if annotation in ["eTag", "entityId"]:
continue
else:
assert (
manifest_annotations[annotation][0]
== manifest_file_contents[annotation].unique()
)

@pytest.mark.parametrize(
"manifest_path, dataset_id, validate_component, expected_manifest_id, "
"expected_table_id, dataset_scope",
test_cases,
)
def test_submit_filebased_manifest_file_and_entities(
self,
helpers,
manifest_path,
dataset_id,
validate_component,
expected_manifest_id,
expected_table_id,
dataset_scope,
mocker: MockerFixture,
synapse_store,
):
self._submit_and_verify_manifest(
helpers=helpers,
mocker=mocker,
synapse_store=synapse_store,
manifest_path=manifest_path,
dataset_id=dataset_id,
expected_manifest_id=expected_manifest_id,
expected_table_id=expected_table_id,
manifest_record_type="file_and_entities",
validate_component=validate_component,
dataset_scope=dataset_scope,
)

@pytest.mark.parametrize(
"manifest_path, dataset_id, validate_component, expected_manifest_id, dataset_scope",
[
(
"mock_manifests/filepath_submission_test_manifest.csv",
"syn62276880",
None,
"syn62280543",
None,
),
(
"mock_manifests/ValidFilenameManifest.csv",
"syn62822337",
"MockFilename",
"syn62822975",
"syn62822337",
),
],
"manifest_path, dataset_id, validate_component, expected_manifest_id, "
"expected_table_id, dataset_scope",
test_cases,
)
def test_submit_filebased_manifest(
def test_submit_filebased_manifest_table_and_file(
self,
helpers,
manifest_path,
dataset_id,
validate_component,
expected_manifest_id,
expected_table_id,
dataset_scope,
mocker: MockerFixture,
synapse_store,
):
self._submit_and_verify_manifest(
helpers=helpers,
mocker=mocker,
synapse_store=synapse_store,
manifest_path=manifest_path,
dataset_id=dataset_id,
expected_manifest_id=expected_manifest_id,
expected_table_id=expected_table_id,
manifest_record_type="table_and_file",
validate_component=validate_component,
dataset_scope=dataset_scope,
)

def _submit_and_verify_manifest(
self,
helpers,
mocker,
synapse_store,
manifest_path,
dataset_id,
expected_manifest_id,
expected_table_id,
manifest_record_type,
validate_component=None,
dataset_scope=None,
):
# spys
# Spies
spy_upload_file_as_csv = mocker.spy(SynapseStorage, "upload_manifest_as_csv")
spy_upload_file_as_table = mocker.spy(
SynapseStorage, "upload_manifest_as_table"
Expand All @@ -54,31 +182,80 @@ def test_submit_filebased_manifest(
# GIVEN a metadata model object using class labels
meta_data_model = metadata_model(helpers, "class_label")

# AND a filebased test manifset
manifest_path = helpers.get_data_path(manifest_path)
# AND a filebased test manifest
load_args = {"dtype": "string"}
manifest = helpers.get_data_frame(
manifest_path, preserve_raw_input=True, **load_args
)
manifest_full_path = helpers.get_data_path(manifest_path)

# WHEN the manifest it submitted
# WHEN the manifest is submitted and files are annotated
# THEN submission should complete without error

with does_not_raise():
manifest_id = meta_data_model.submit_metadata_manifest(
manifest_path=manifest_path,
manifest_path=manifest_full_path,
dataset_id=dataset_id,
manifest_record_type="file_and_entities",
manifest_record_type=manifest_record_type,
restrict_rules=False,
file_annotations_upload=True,
hide_blanks=False,
validate_component=validate_component,
dataset_scope=dataset_scope,
)

# AND the manifest should be submitted to the correct place
assert manifest_id == expected_manifest_id
# AND the files should be annotated
spy_add_annotations.assert_called_once()

# AND the manifest should be uploaded as a CSV
spy_upload_file_as_csv.assert_called_once()
# AND annotations should be added to the files
spy_add_annotations.assert_called_once()
# AND the annotations on the entities should have the correct metadata
for index, row in manifest.iterrows():
entityId = row["entityId"]
expected_sample_id = row["Sample ID"]
annos = synapse_store.syn.get_annotations(entityId)
sample_id = annos["SampleID"][0]
assert str(sample_id) == str(expected_sample_id)

# AND the annotations on the manifest file itself are correct
manifest_file_annotations = synapse_store.syn.get_annotations(
expected_manifest_id
)
self.validate_manifest_annotations(
manifest_annotations=manifest_file_annotations,
manifest_entity_type="file",
expected_entity_id=expected_manifest_id,
manifest_file_contents=manifest,
)

# AND the manifest should not be uploaded as a table or combination of table, file, and entities
if manifest_record_type == "table_and_file":
with tempfile.TemporaryDirectory() as download_dir:
manifest_table = synapse_store.syn.tableQuery(
f"select * from {expected_table_id}", downloadLocation=download_dir
).asDataFrame()

# AND the columns in the manifest table should reflect the ones in the file
table_columns = manifest_table.columns
manifest_columns = [col.replace(" ", "") for col in manifest.columns]
assert set(table_columns) == set(manifest_columns)

# AND the annotations on the manifest table itself are correct
manifest_table_annotations = synapse_store.syn.get_annotations(
expected_table_id
)
self.validate_manifest_annotations(
manifest_annotations=manifest_table_annotations,
manifest_entity_type="table",
expected_entity_id=expected_table_id,
)

# AND the manifest should be submitted to the correct place
assert manifest_id == expected_manifest_id

# AND the correct upload methods were called for the given record type
if manifest_record_type == "file_and_entities":
spy_upload_file_as_csv.assert_called_once()
spy_upload_file_as_table.assert_not_called()
spy_upload_file_combo.assert_not_called()
elif manifest_record_type == "table_and_file":
spy_upload_file_as_table.assert_called_once()
spy_upload_file_as_csv.assert_not_called()
spy_upload_file_combo.assert_not_called()
Loading

0 comments on commit 8dcc572

Please sign in to comment.