From e1981fea622f857cdc38f91b56891e8a83d75c64 Mon Sep 17 00:00:00 2001 From: Eugene Clark Date: Fri, 26 Apr 2024 09:47:23 -0400 Subject: [PATCH] Update unit tests to match the new VRS 2.0 digests (#92) * Update expected VRS IDs for VCF tests * Update VRS IDs for variation tests * Update variation test data to match VRS 2.0 changes * Comment out response model to return full VRS objects instead of serialized version * Make get location/variation behave consistently even when the object store does not throw a KeyError on missing key * Remove code added to make debugging easier * Storage implementations should be consistent with MutableMapping API and throw KeyError when an item is not found * Remove VRS model classes from response objects because the serialization used internally is not correct for API responses * Corrected path used for missing allele id test * Get location and get variation should be consistent in behavior when id is not found * Revert unecessary change --- src/anyvar/restapi/main.py | 19 ++++++++---- src/anyvar/restapi/schema.py | 28 +++++++++--------- src/anyvar/storage/postgres.py | 2 ++ src/anyvar/storage/snowflake.py | 2 ++ tests/data/variations.json | 52 +++++++++++++++++++-------------- tests/test_location.py | 5 ++-- tests/test_variation.py | 4 +-- tests/test_vcf.py | 6 ++-- 8 files changed, 68 insertions(+), 50 deletions(-) diff --git a/src/anyvar/restapi/main.py b/src/anyvar/restapi/main.py index 02060c3..f80944c 100644 --- a/src/anyvar/restapi/main.py +++ b/src/anyvar/restapi/main.py @@ -100,11 +100,16 @@ def get_location_by_id( try: location = av.get_object(location_id) except KeyError: - return HTTPException(status_code=HTTPStatus.NOT_FOUND) + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, detail=f"Location {location_id} not found" + ) + if location: return {"location": location.model_dump(exclude_none=True)} else: - return {"location": None} + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, detail=f"Location {location_id} not found" + ) @app.put( @@ -273,7 +278,6 @@ def get_variation_by_id( :raise HTTPException: if no variation matches provided ID """ av: AnyVar = request.app.state.anyvar - try: variation = av.get_object(variation_id, deref=True) except KeyError: @@ -281,9 +285,12 @@ def get_variation_by_id( status_code=HTTPStatus.NOT_FOUND, detail=f"Variation {variation_id} not found" ) - result = {"messages": [], "data": variation.model_dump(exclude_none=True)} - - return result + if variation: + return {"messages": [], "data": variation.model_dump(exclude_none=True)} + else: + raise HTTPException( + status_code=HTTPStatus.NOT_FOUND, detail=f"Variation {variation_id} not found" + ) @app.get( diff --git a/src/anyvar/restapi/schema.py b/src/anyvar/restapi/schema.py index 6212555..ddd7e33 100644 --- a/src/anyvar/restapi/schema.py +++ b/src/anyvar/restapi/schema.py @@ -50,7 +50,7 @@ def schema_extra(schema: Dict[str, Any], model: Type["InfoResponse"]) -> None: class GetSequenceLocationResponse(BaseModel): """Describe response for the /locations/ endpoint""" - location: Optional[models.SequenceLocation] + location: Optional[Dict] class RegisterVariationRequest(BaseModel): @@ -124,7 +124,7 @@ class GetVariationResponse(BaseModel): """Describe response for the /variation get endpoint""" messages: List[StrictStr] - data: models.Variation + data: Dict class Config: """Configure GetVariationResponse class""" @@ -139,19 +139,21 @@ def schema_extra(schema: Dict[str, Any], model: Type["GetVariationResponse"]) -> schema["example"] = { "messages": [], "data": { - "_id": "ga4gh:VA.ZDdoQdURgO2Daj2NxLj4pcDnjiiAsfbO", - "type": "Allele", + "digest": "K7akyz9PHB0wg8wBNVlWAAdvMbJUJJfU", + "id": "ga4gh:VA.K7akyz9PHB0wg8wBNVlWAAdvMbJUJJfU", "location": { - "_id": "ga4gh:VSL.2cHIgn7iLKk4x9z3zLkSTTFMV0e48DR4", - "type": "SequenceLocation", - "sequence_id": "ga4gh:SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", - "interval": { - "type": "SequenceInterval", - "start": {"type": "Number", "value": 599}, - "end": {"type": "Number", "value": 600}, + "digest": "01EH5o6V6VEyNUq68gpeTwKE7xOo-WAy", + "id": "ga4gh:SL.01EH5o6V6VEyNUq68gpeTwKE7xOo-WAy", + "start": 87894076, + "end": 87894077, + "sequenceReference": { + "refgetAccession": "SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB", + "type": "SequenceReference", }, + "type": "SequenceLocation", }, - "state": {"type": "LiteralSequenceExpression", "sequence": "E"}, + "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, + "type": "Allele", }, } @@ -159,7 +161,7 @@ def schema_extra(schema: Dict[str, Any], model: Type["GetVariationResponse"]) -> class SearchResponse(BaseModel): """Describe response for the /search endpoint""" - variations: List[models.Variation] + variations: List[Dict] class VariationStatisticType(str, Enum): diff --git a/src/anyvar/storage/postgres.py b/src/anyvar/storage/postgres.py index 0a9c162..c28df48 100644 --- a/src/anyvar/storage/postgres.py +++ b/src/anyvar/storage/postgres.py @@ -105,6 +105,8 @@ def __getitem__(self, name: str) -> Optional[Any]: return models.SequenceLocation(**result) else: raise NotImplementedError + else: + raise KeyError(name) def __contains__(self, name: str) -> bool: """Check whether VRS objects table contains ID. diff --git a/src/anyvar/storage/snowflake.py b/src/anyvar/storage/snowflake.py index eb035ef..54857c9 100644 --- a/src/anyvar/storage/snowflake.py +++ b/src/anyvar/storage/snowflake.py @@ -203,6 +203,8 @@ def __getitem__(self, name: str) -> Optional[Any]: return models.SequenceLocation(**result) else: raise NotImplementedError + else: + raise KeyError(name) def __contains__(self, name: str) -> bool: """Check whether VRS objects table contains ID. diff --git a/tests/data/variations.json b/tests/data/variations.json index 3849966..f244666 100644 --- a/tests/data/variations.json +++ b/tests/data/variations.json @@ -1,17 +1,19 @@ { "alleles": { - "ga4gh:VA.GbDZmcmmWJngURaODSrbbLWgNEeSYhFB": { + "ga4gh:VA.K7akyz9PHB0wg8wBNVlWAAdvMbJUJJfU": { "params": { "definition": "NC_000010.11:g.87894077C>T" }, "allele_response": { "messages": [], "object": { - "id": "ga4gh:VA.GbDZmcmmWJngURaODSrbbLWgNEeSYhFB", + "digest": "K7akyz9PHB0wg8wBNVlWAAdvMbJUJJfU", + "id": "ga4gh:VA.K7akyz9PHB0wg8wBNVlWAAdvMbJUJJfU", "location": { - "id": "ga4gh:SL.aCMcqLGKClwMWEDx3QWe4XSiGDlKXdB8", - "end": 87894077, + "digest": "01EH5o6V6VEyNUq68gpeTwKE7xOo-WAy", + "id": "ga4gh:SL.01EH5o6V6VEyNUq68gpeTwKE7xOo-WAy", "start": 87894076, + "end": 87894077, "sequenceReference": { "refgetAccession": "SQ.ss8r_wB0-b9r44TQTMmVTI92884QvBiB", "type": "SequenceReference" @@ -25,38 +27,40 @@ "type": "Allele" } }, - "location_id": "ga4gh:SL.aCMcqLGKClwMWEDx3QWe4XSiGDlKXdB8" + "location_id": "ga4gh:SL.01EH5o6V6VEyNUq68gpeTwKE7xOo-WAy" }, - "ga4gh:VA.EB-JEr9jnAjzL_zjxzBqRgGMREVCv31v": { + "ga4gh:VA.1FzYrqG-7jB3Wr46eIL_L5BWElQZEB7i": { "params": { "definition": "NM_000551.3:c.1A>T" }, "allele_response": { "messages": [], "object": { - "id": "ga4gh:VA.EB-JEr9jnAjzL_zjxzBqRgGMREVCv31v", - "type": "Allele", + "digest": "1FzYrqG-7jB3Wr46eIL_L5BWElQZEB7i", + "id": "ga4gh:VA.1FzYrqG-7jB3Wr46eIL_L5BWElQZEB7i", "location": { - "id": "ga4gh:SL.-FaQ9J0LDvasaUpfLKstbyDx8nUld9fl", - "type": "SequenceLocation", + "digest": "WoWgQNl0L6EkWkW_xRl0TSwzmzh-Z3FD", + "id": "ga4gh:SL.WoWgQNl0L6EkWkW_xRl0TSwzmzh-Z3FD", + "start": 213, + "end": 214, "sequenceReference": { "refgetAccession": "SQ.v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_", "type": "SequenceReference" }, - "start": 0, - "end": 1 + "type": "SequenceLocation" }, "state": { "type": "LiteralSequenceExpression", "sequence": "T" - } + }, + "type": "Allele" } }, - "location_id": "ga4gh:SL.-FaQ9J0LDvasaUpfLKstbyDx8nUld9fl" + "location_id": "ga4gh:SL.WoWgQNl0L6EkWkW_xRl0TSwzmzh-Z3FD" } }, "copy_numbers": { - "ga4gh:CX.UvIENZGNmCM5j38yHueYdd-BZLn-aLJM": { + "ga4gh:CX.TvAhuGK6HYf53mXoUnon60cZ7DC_UgM3": { "params": { "definition": "NC_000013.11:g.26440969_26443305del", "copy_change": "efo:0030069", @@ -65,9 +69,11 @@ "copy_number_response": { "messages": [], "object": { - "id": "ga4gh:CX.UvIENZGNmCM5j38yHueYdd-BZLn-aLJM", + "digest": "TvAhuGK6HYf53mXoUnon60cZ7DC_UgM3", + "id": "ga4gh:CX.TvAhuGK6HYf53mXoUnon60cZ7DC_UgM3", "location": { - "id": "ga4gh:SL.XBjSbazxe6h8lI14zfXIxqt6J718QEec", + "digest": "4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3", + "id": "ga4gh:SL.4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3", "start": 26440968, "end": 26443305, "sequenceReference": { @@ -80,9 +86,9 @@ "type": "CopyNumberChange" } }, - "location_id": "ga4gh:SL.XBjSbazxe6h8lI14zfXIxqt6J718QEec" + "location_id": "ga4gh:SL.4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3" }, - "ga4gh:CN.tPLSlUBW6j2dVHv0G81U_IFXt4Xawo7J": { + "ga4gh:CN.QnU97C-cRW431O9qWia9UCVRBDvGDH7I": { "params": { "definition": "NC_000013.11:g.26440969_26443305del", "copies": 1, @@ -91,9 +97,11 @@ "copy_number_response": { "messages": [], "object": { - "id": "ga4gh:CN.tPLSlUBW6j2dVHv0G81U_IFXt4Xawo7J", + "digest": "QnU97C-cRW431O9qWia9UCVRBDvGDH7I", + "id": "ga4gh:CN.QnU97C-cRW431O9qWia9UCVRBDvGDH7I", "location": { - "id": "ga4gh:SL.XBjSbazxe6h8lI14zfXIxqt6J718QEec", + "digest": "4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3", + "id": "ga4gh:SL.4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3", "start": 26440968, "end": 26443305, "sequenceReference": { @@ -106,7 +114,7 @@ "type": "CopyNumberCount" } }, - "location_id": "ga4gh:SL.XBjSbazxe6h8lI14zfXIxqt6J718QEec" + "location_id": "ga4gh:SL.4akcjXlbAu4xBKnxjOL_b4DM_20HOCA3" } } } \ No newline at end of file diff --git a/tests/test_location.py b/tests/test_location.py index fadcc23..59f4322 100644 --- a/tests/test_location.py +++ b/tests/test_location.py @@ -11,6 +11,5 @@ def test_location(client, alleles): assert resp.json()["location"] == allele["allele_response"]["object"]["location"] # invalid ID - resp = client.get("/locations/not_a_real_location") - assert resp.status_code == HTTPStatus.OK - assert "location" not in resp.json() + bad_resp = client.get("/locations/not_a_real_location") + assert bad_resp.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/test_variation.py b/tests/test_variation.py index 0b9d281..b974d05 100644 --- a/tests/test_variation.py +++ b/tests/test_variation.py @@ -52,14 +52,12 @@ def test_put_copy_number(client, copy_numbers): def test_put_vrs_variation(client, alleles, copy_numbers): for allele_id, allele in alleles.items(): params = deepcopy(allele["allele_response"]["object"]) - params.pop("id") resp = client.put("/vrs_variation", json=params) assert resp.status_code == HTTPStatus.OK assert resp.json()["object_id"] == allele_id for copy_number_id, copy_number in copy_numbers.items(): params = deepcopy(copy_number["copy_number_response"]["object"]) - params.pop("id") resp = client.put("/vrs_variation", json=params) assert resp.status_code == HTTPStatus.OK assert resp.json()["object_id"] == copy_number_id @@ -71,7 +69,7 @@ def test_get_allele(client, alleles): assert resp.status_code == HTTPStatus.OK assert resp.json()["data"] == allele["allele_response"]["object"] - bad_resp = client.get("/allele/ga4gh:VA.invalid7DSM9KE3Z0LntAukLqm0K2ENn") + bad_resp = client.get("/variation/ga4gh:VA.invalid7DSM9KE3Z0LntAukLqm0K2ENn") assert bad_resp.status_code == HTTPStatus.NOT_FOUND diff --git a/tests/test_vcf.py b/tests/test_vcf.py index c4057d7..c0a9f29 100644 --- a/tests/test_vcf.py +++ b/tests/test_vcf.py @@ -49,7 +49,7 @@ def test_vcf_registration_default_assembly(client, sample_vcf_grch38): resp = client.put("/vcf", files={"vcf": ("test.vcf", sample_vcf_grch38)}) assert resp.status_code == HTTPStatus.OK assert ( - b"VRS_Allele_IDs=ga4gh:VA.8MVWsPFeScEY_19U32k4LVI_3lEOKGUL,ga4gh:VA.l7crC9qxgcDtexejZkfp6yPNevEMnZ5y" + b"VRS_Allele_IDs=ga4gh:VA.ryPubD68BB0D-D78L_kK4993mXmsNNWe,ga4gh:VA._QhHH18HBAIeLos6npRgR-S_0lAX5KR6" in resp.content ) @@ -58,7 +58,7 @@ def test_vcf_registration_grch38(client, sample_vcf_grch38): resp = client.put("/vcf", params={"assembly": "GRCh38"}, files={"vcf": ("test.vcf", sample_vcf_grch38)}) assert resp.status_code == HTTPStatus.OK assert ( - b"VRS_Allele_IDs=ga4gh:VA.8MVWsPFeScEY_19U32k4LVI_3lEOKGUL,ga4gh:VA.l7crC9qxgcDtexejZkfp6yPNevEMnZ5y" + b"VRS_Allele_IDs=ga4gh:VA.ryPubD68BB0D-D78L_kK4993mXmsNNWe,ga4gh:VA._QhHH18HBAIeLos6npRgR-S_0lAX5KR6" in resp.content ) @@ -106,7 +106,7 @@ def test_vcf_registration_grch37(client, sample_vcf_grch37): resp = client.put("/vcf", params={"assembly": "GRCh37"}, files={"vcf": ("test.vcf", sample_vcf_grch37)}) assert resp.status_code == HTTPStatus.OK assert ( - b"VRS_Allele_IDs=ga4gh:VA.R-tU3agvAtsRD59MXZaCymsVE4yjMpt3,ga4gh:VA.f_EzEO6CLZmubm1gbL20F9EV8GZPLAJz" + b"VRS_Allele_IDs=ga4gh:VA.iwk6beQfvJGkeW33NBbSqalr29XkDBE5,ga4gh:VA.CNSRLQlBrly3rRcdldN85dw2Tjos7Cas" in resp.content )