From 03425d33d2c8e132ea7610df3c038d7afc15c7da Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 18 Jul 2024 09:22:37 -0400 Subject: [PATCH] fix: ensure Entity objects return type as str (#434) close #433 * Add back `use_enum_values` config . I removed in #417 because of a copy/paste I had done in the Adjacency class which left me confused. * `type` field should be a literal string, not enum --- .../getting_started/3_Basic_Models.ipynb | 2 +- .../4_Exploring_the_AlleleTranslator.ipynb | 2 +- .../5_Exploring_the_CnvTranslator.ipynb | 30 ++++++------ .../getting_started/6_Upcoming_features.ipynb | 46 +++++++++---------- src/ga4gh/core/__init__.py | 2 + src/ga4gh/core/domain_models.py | 34 +++++++------- src/ga4gh/core/entity_models.py | 6 ++- src/ga4gh/vrs/__init__.py | 2 + src/ga4gh/vrs/models.py | 36 ++++++++------- tests/validation/test_models.py | 24 +++++++++- 10 files changed, 107 insertions(+), 77 deletions(-) diff --git a/notebooks/getting_started/3_Basic_Models.ipynb b/notebooks/getting_started/3_Basic_Models.ipynb index 87931317..e6310d84 100644 --- a/notebooks/getting_started/3_Basic_Models.ipynb +++ b/notebooks/getting_started/3_Basic_Models.ipynb @@ -347,7 +347,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb b/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb index d8542b1e..26e0ec97 100644 --- a/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb +++ b/notebooks/getting_started/4_Exploring_the_AlleleTranslator.ipynb @@ -513,7 +513,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb b/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb index 44252c7a..0ab4182d 100644 --- a/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb +++ b/notebooks/getting_started/5_Exploring_the_CnvTranslator.ipynb @@ -170,16 +170,16 @@ "data": { "text/plain": [ "{'id': 'ga4gh:CX.0M5VkV5v504_laQURFMEsqzZGcOF9YEw',\n", - " 'type': ,\n", + " 'type': 'CopyNumberChange',\n", " 'digest': '0M5VkV5v504_laQURFMEsqzZGcOF9YEw',\n", " 'location': {'id': 'ga4gh:SL.GSJAEJXFDz7Nq6VlJj5NTEku48MmteUU',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'GSJAEJXFDz7Nq6VlJj5NTEku48MmteUU',\n", - " 'sequenceReference': {'type': ,\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.eK4D2MosgK_ivBkgi6FVPg5UXs1bYESm'},\n", " 'start': 45002866,\n", " 'end': 45015056},\n", - " 'copyChange': }" + " 'copyChange': 'efo:0030067'}" ] }, "execution_count": 4, @@ -225,16 +225,16 @@ "data": { "text/plain": [ "{'id': 'ga4gh:CX.0BN4vrqPrLPAZYsQEAPnG4IS8AYeBGe1',\n", - " 'type': ,\n", + " 'type': 'CopyNumberChange',\n", " 'digest': '0BN4vrqPrLPAZYsQEAPnG4IS8AYeBGe1',\n", " 'location': {'id': 'ga4gh:SL.tydo6UFL8Y60L5Me3k8AJfljURO9vYn9',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'tydo6UFL8Y60L5Me3k8AJfljURO9vYn9',\n", - " 'sequenceReference': {'type': ,\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI'},\n", " 'start': 75502957,\n", " 'end': 76045032},\n", - " 'copyChange': }" + " 'copyChange': 'efo:0030070'}" ] }, "execution_count": 5, @@ -292,12 +292,12 @@ "data": { "text/plain": [ "{'id': 'ga4gh:CN.O_QHImmfErh9jDFkJaypPPvUmnj7EM70',\n", - " 'type': ,\n", + " 'type': 'CopyNumberCount',\n", " 'digest': 'O_QHImmfErh9jDFkJaypPPvUmnj7EM70',\n", " 'location': {'id': 'ga4gh:SL.hBVWalem_rNclxjmUuT9CHbEGCdlqW9L',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'hBVWalem_rNclxjmUuT9CHbEGCdlqW9L',\n", - " 'sequenceReference': {'type': ,\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.HxuclGHh0XCDuF8x6yQrpHUBL7ZntAHc'},\n", " 'start': 85623,\n", " 'end': 57073230},\n", @@ -346,12 +346,12 @@ "data": { "text/plain": [ "{'id': 'ga4gh:CN.WDzlT9oUq4IcQrVRWGH0dZnARnFBotCS',\n", - " 'type': ,\n", + " 'type': 'CopyNumberCount',\n", " 'digest': 'WDzlT9oUq4IcQrVRWGH0dZnARnFBotCS',\n", " 'location': {'id': 'ga4gh:SL.H1Zh5xdBqamBjwVE9orWdY_uBkpEMH1V',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'H1Zh5xdBqamBjwVE9orWdY_uBkpEMH1V',\n", - " 'sequenceReference': {'type': ,\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.5ZUqxCmDDgN4xTRbaSjN8LwgZironmB8'},\n", " 'start': 46111352,\n", " 'end': 46119948},\n", @@ -385,7 +385,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/notebooks/getting_started/6_Upcoming_features.ipynb b/notebooks/getting_started/6_Upcoming_features.ipynb index 5f4f31eb..01f6f9bf 100644 --- a/notebooks/getting_started/6_Upcoming_features.ipynb +++ b/notebooks/getting_started/6_Upcoming_features.ipynb @@ -146,17 +146,16 @@ "data": { "text/plain": [ "{'id': 'ga4gh:VA.LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", - " 'type': ,\n", + " 'type': 'Allele',\n", " 'digest': 'LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", " 'location': {'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", - " 'sequenceReference': {'type': ,\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", " 'start': 80656509,\n", " 'end': 80656510},\n", - " 'state': {'type': ,\n", - " 'sequence': 'TT'}}" + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" ] }, "execution_count": 4, @@ -226,9 +225,9 @@ "data": { "text/plain": [ "{'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", - " 'sequenceReference': {'type': ,\n", + " 'sequenceReference': {'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", " 'start': 80656509,\n", " 'end': 80656510}" @@ -261,7 +260,7 @@ { "data": { "text/plain": [ - "{'type': ,\n", + "{'type': 'SequenceReference',\n", " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'}" ] }, @@ -293,7 +292,7 @@ "data": { "text/plain": [ "{'id': None,\n", - " 'type': ,\n", + " 'type': 'SequenceReference',\n", " 'label': None,\n", " 'description': None,\n", " 'alternativeLabels': None,\n", @@ -399,9 +398,9 @@ "data": { "text/plain": [ "{'id': 'refseq:NC_000005.10',\n", - " 'type': ,\n", - " 'label': 'GRCh38:chr5',\n", - " 'alternativeLabels': ['GRCh38:5'],\n", + " 'type': 'SequenceReference',\n", + " 'label': 'GRCh38:5',\n", + " 'alternativeLabels': ['GRCh38:chr5'],\n", " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'}" ] }, @@ -435,12 +434,12 @@ "data": { "text/plain": [ "{'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", " 'sequenceReference': {'id': 'refseq:NC_000005.10',\n", - " 'type': ,\n", - " 'label': 'GRCh38:chr5',\n", - " 'alternativeLabels': ['GRCh38:5'],\n", + " 'type': 'SequenceReference',\n", + " 'label': 'GRCh38:5',\n", + " 'alternativeLabels': ['GRCh38:chr5'],\n", " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", " 'start': 80656509,\n", " 'end': 80656510}" @@ -465,20 +464,19 @@ "data": { "text/plain": [ "{'id': 'ga4gh:VA.LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", - " 'type': ,\n", + " 'type': 'Allele',\n", " 'digest': 'LK_4rOVxyEwrEpaOVd-BDFV0ocbO5vgV',\n", " 'location': {'id': 'ga4gh:SL.nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", - " 'type': ,\n", + " 'type': 'SequenceLocation',\n", " 'digest': 'nA5-KovovkH-5p3LF1657nkkeWFwrInI',\n", " 'sequenceReference': {'id': 'refseq:NC_000005.10',\n", - " 'type': ,\n", - " 'label': 'GRCh38:chr5',\n", - " 'alternativeLabels': ['GRCh38:5'],\n", + " 'type': 'SequenceReference',\n", + " 'label': 'GRCh38:5',\n", + " 'alternativeLabels': ['GRCh38:chr5'],\n", " 'refgetAccession': 'SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI'},\n", " 'start': 80656509,\n", " 'end': 80656510},\n", - " 'state': {'type': ,\n", - " 'sequence': 'TT'}}" + " 'state': {'type': 'LiteralSequenceExpression', 'sequence': 'TT'}}" ] }, "execution_count": 11, @@ -507,7 +505,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/src/ga4gh/core/__init__.py b/src/ga4gh/core/__init__.py index 3941f2e2..196c20ec 100644 --- a/src/ga4gh/core/__init__.py +++ b/src/ga4gh/core/__init__.py @@ -15,6 +15,7 @@ from .pydantic import ( is_pydantic_instance, is_curie_type, pydantic_copy ) +from .domain_models import CommonDomainType from . import entity_models, domain_models __all__ = [ @@ -36,6 +37,7 @@ "is_pydantic_instance", "is_curie_type", "pydantic_copy", + "CommonDomainType", "entity_models", "domain_models" ] diff --git a/src/ga4gh/core/domain_models.py b/src/ga4gh/core/domain_models.py index e9a070c7..853da78b 100644 --- a/src/ga4gh/core/domain_models.py +++ b/src/ga4gh/core/domain_models.py @@ -10,8 +10,8 @@ * `import ga4gh.core`, and refer to models using the fully-qualified module name, e.g., `ga4gh.core.domain_models.Gene` """ -from typing import Literal, Union, List from enum import Enum +from typing import Literal, Union, List from pydantic import Field, RootModel @@ -33,8 +33,8 @@ class CommonDomainType(str, Enum): class Phenotype(DomainEntity): """An observable characteristic or trait of an organism.""" - type: Literal[CommonDomainType.PHENOTYPE] = Field( - CommonDomainType.PHENOTYPE, + type: Literal["Phenotype"] = Field( + CommonDomainType.PHENOTYPE.value, description=f'MUST be "{CommonDomainType.PHENOTYPE.value}".' ) @@ -44,8 +44,8 @@ class Disease(DomainEntity): of all or part of an organism and is not immediately due to any external injury. """ - type: Literal[CommonDomainType.DISEASE] = Field( - CommonDomainType.DISEASE, + type: Literal["Disease"] = Field( + CommonDomainType.DISEASE.value, description=f'MUST be "{CommonDomainType.DISEASE.value}".' ) @@ -53,8 +53,8 @@ class Disease(DomainEntity): class TraitSet(DomainEntity): """A set of phenotype and/or disease concepts that together constitute a condition.""" - type: Literal[CommonDomainType.TRAIT_SET] = Field( - CommonDomainType.TRAIT_SET, + type: Literal["TraitSet"] = Field( + CommonDomainType.TRAIT_SET.value, description=f'MUST be "{CommonDomainType.TRAIT_SET.value}".' ) traits: List[Union[Disease, Phenotype]] = Field( @@ -76,8 +76,8 @@ class Condition(RootModel): class TherapeuticAction(DomainEntity): """A therapeutic action taken that is intended to alter or stop a pathologic process.""" - type: Literal[CommonDomainType.TR_ACTION] = Field( - CommonDomainType.TR_ACTION, + type: Literal["TherapeuticAction"] = Field( + CommonDomainType.TR_ACTION.value, description=f'MUST be "{CommonDomainType.TR_ACTION.value}".' ) @@ -85,8 +85,8 @@ class TherapeuticAction(DomainEntity): class TherapeuticAgent(DomainEntity): """An administered therapeutic agent that is intended to alter or stop a pathologic process.""" - type: Literal[CommonDomainType.TR_AGENT] = Field( - CommonDomainType.TR_AGENT, + type: Literal["TherapeuticAgent"] = Field( + CommonDomainType.TR_AGENT.value, description=f'MUST be "{CommonDomainType.TR_AGENT.value}".' ) @@ -94,8 +94,8 @@ class TherapeuticAgent(DomainEntity): class TherapeuticSubstituteGroup(DomainEntity): """A group of therapeutic procedures that may be treated as substitutes for one another.""" - type: Literal[CommonDomainType.TR_SUB] = Field( - CommonDomainType.TR_SUB, + type: Literal["TherapeuticSubstituteGroup"] = Field( + CommonDomainType.TR_SUB.value, description=f'MUST be "{CommonDomainType.TR_SUB.value}".' ) substitutes: List[Union[TherapeuticAction, TherapeuticAgent]] = Field( @@ -110,8 +110,8 @@ class CombinationTherapy(DomainEntity): performed in combination. """ - type: Literal[CommonDomainType.TR_COMB] = Field( - CommonDomainType.TR_COMB, + type: Literal["CombinationTherapy"] = Field( + CommonDomainType.TR_COMB.value, description=f'MUST be "{CommonDomainType.TR_COMB.value}".' ) components: List[Union[TherapeuticSubstituteGroup, TherapeuticAction, TherapeuticAgent]] = Field( @@ -136,7 +136,7 @@ class TherapeuticProcedure(RootModel): class Gene(DomainEntity): """A basic physical and functional unit of heredity.""" - type: Literal[CommonDomainType.GENE] = Field( - CommonDomainType.GENE, + type: Literal["Gene"] = Field( + CommonDomainType.GENE.value, description=f'MUST be "{CommonDomainType.GENE.value}".' ) diff --git a/src/ga4gh/core/entity_models.py b/src/ga4gh/core/entity_models.py index 0e00ef61..78146b49 100644 --- a/src/ga4gh/core/entity_models.py +++ b/src/ga4gh/core/entity_models.py @@ -14,7 +14,7 @@ from typing import Any, Dict, Annotated, Optional, Union, List from enum import Enum -from pydantic import BaseModel, Field, RootModel, StringConstraints, model_serializer +from pydantic import BaseModel, Field, RootModel, StringConstraints, model_serializer, ConfigDict from ga4gh.core import GA4GH_IR_REGEXP @@ -119,6 +119,8 @@ class Coding(BaseModel): class ConceptMapping(BaseModel): """A mapping to a concept in a terminology or code system.""" + model_config = ConfigDict(use_enum_values=True) + coding: Coding = Field(..., description="A structured representation of a code for a defined concept in a terminology or code system.") relation: Relation = Field(..., description="A mapping relation between concepts as defined by the Simple Knowledge Organization System (SKOS).") @@ -144,6 +146,8 @@ class Expression(BaseModel): variation include the HGVS and ISCN nomenclatures. """ + model_config = ConfigDict(use_enum_values=True) + syntax: Syntax = Field(..., description="The syntax used to describe the variation. The value should be one of the supported syntaxes.") value: str = Field(..., description="The expression of the variation in the specified syntax. The value should be a valid expression in the specified syntax.") syntax_version: Optional[str] = Field(None, description="The version of the syntax used to describe the variation. This is particularly important for HGVS expressions, as the syntax has evolved over time.") diff --git a/src/ga4gh/vrs/__init__.py b/src/ga4gh/vrs/__init__.py index adbec052..997c9a52 100644 --- a/src/ga4gh/vrs/__init__.py +++ b/src/ga4gh/vrs/__init__.py @@ -6,12 +6,14 @@ from .normalize import normalize from .enderef import vrs_deref, vrs_enref +from .models import VrsType from . import models __all__ = [ "normalize", "vrs_deref", "vrs_enref", + "VrsType", "models" ] diff --git a/src/ga4gh/vrs/models.py b/src/ga4gh/vrs/models.py index 6b1bcdc9..e787694b 100644 --- a/src/ga4gh/vrs/models.py +++ b/src/ga4gh/vrs/models.py @@ -26,7 +26,7 @@ ) from ga4gh.core.pydantic import get_pydantic_root -from pydantic import BaseModel, Field, RootModel, StringConstraints, model_serializer +from pydantic import BaseModel, Field, RootModel, StringConstraints, model_serializer, ConfigDict from ga4gh.core.pydantic import ( getattr_in @@ -369,8 +369,8 @@ class SequenceString(RootModel): class LengthExpression(_ValueObject): """A sequence expressed only by its length.""" - type: Literal[VrsType.LEN_EXPR] = Field( - VrsType.LEN_EXPR, description=f'MUST be "{VrsType.LEN_EXPR.value}"' + type: Literal["LengthExpression"] = Field( + VrsType.LEN_EXPR.value, description=f'MUST be "{VrsType.LEN_EXPR.value}"' ) length: Optional[Union[Range, int]] = None @@ -384,8 +384,8 @@ class ga4gh(_ValueObject.ga4gh): class ReferenceLengthExpression(_ValueObject): """An expression of a length of a sequence from a repeating reference.""" - type: Literal[VrsType.REF_LEN_EXPR] = Field( - VrsType.REF_LEN_EXPR, description=f'MUST be "{VrsType.REF_LEN_EXPR.value}"' + type: Literal["ReferenceLengthExpression"] = Field( + VrsType.REF_LEN_EXPR.value, description=f'MUST be "{VrsType.REF_LEN_EXPR.value}"' ) length: Union[Range, int] = Field( ..., description='The number of residues of the expressed sequence.' @@ -408,8 +408,8 @@ class ga4gh(_ValueObject.ga4gh): class LiteralSequenceExpression(_ValueObject): """An explicit expression of a Sequence.""" - type: Literal[VrsType.LIT_SEQ_EXPR] = Field( - VrsType.LIT_SEQ_EXPR, description=f'MUST be "{VrsType.LIT_SEQ_EXPR.value}"' + type: Literal["LiteralSequenceExpression"] = Field( + VrsType.LIT_SEQ_EXPR.value, description=f'MUST be "{VrsType.LIT_SEQ_EXPR.value}"' ) sequence: SequenceString = Field(..., description='the literal sequence') @@ -428,7 +428,9 @@ class ga4gh(_ValueObject.ga4gh): class SequenceReference(_ValueObject): """A sequence of nucleic or amino acid character codes.""" - type: Literal[VrsType.SEQ_REF] = Field(VrsType.SEQ_REF, description=f'MUST be "{VrsType.SEQ_REF.value}"') + model_config = ConfigDict(use_enum_values=True) + + type: Literal["SequenceReference"] = Field(VrsType.SEQ_REF.value, description=f'MUST be "{VrsType.SEQ_REF.value}"') refgetAccession: Annotated[str, StringConstraints(pattern=r'^SQ.[0-9A-Za-z_\-]{32}$')] = Field( ..., description='A `GA4GH RefGet ` identifier for the referenced sequence, using the sha512t24u digest.', @@ -446,7 +448,7 @@ class ga4gh(_ValueObject.ga4gh): class SequenceLocation(_Ga4ghIdentifiableObject): """A `Location` defined by an interval on a referenced `Sequence`.""" - type: Literal[VrsType.SEQ_LOC] = Field(VrsType.SEQ_LOC, description=f'MUST be "{VrsType.SEQ_LOC.value}"') + type: Literal["SequenceLocation"] = Field(VrsType.SEQ_LOC.value, description=f'MUST be "{VrsType.SEQ_LOC.value}"') sequenceReference: Optional[Union[IRI, SequenceReference]] = Field( None, description='A reference to a `Sequence` on which the location is defined.' ) @@ -527,7 +529,7 @@ class _VariationBase(_Ga4ghIdentifiableObject, ABC): class Allele(_VariationBase): """The state of a molecule at a `Location`.""" - type: Literal[VrsType.ALLELE] = Field(VrsType.ALLELE, description=f'MUST be "{VrsType.ALLELE.value}"') + type: Literal["Allele"] = Field(VrsType.ALLELE.value, description=f'MUST be "{VrsType.ALLELE.value}"') location: Union[IRI, SequenceLocation] = Field( ..., description='The location of the Allele' ) @@ -570,7 +572,7 @@ class ga4gh(_Ga4ghIdentifiableObject.ga4gh): class CisPhasedBlock(_VariationBase): """An ordered set of co-occurring `Variation` on the same molecule.""" - type: Literal[VrsType.CIS_PHASED_BLOCK] = Field(VrsType.CIS_PHASED_BLOCK, description=f'MUST be "{VrsType.CIS_PHASED_BLOCK.value}"') + type: Literal["CisPhasedBlock"] = Field(VrsType.CIS_PHASED_BLOCK.value, description=f'MUST be "{VrsType.CIS_PHASED_BLOCK.value}"') members: List[Union[Allele, IRI]] = Field( ..., description='A list of `Alleles` that are found in-cis on a shared molecule.', @@ -603,7 +605,7 @@ class Adjacency(_VariationBase): potentially with an intervening linker sequence. """ - type: Literal[VrsType.ADJACENCY] = Field(VrsType.ADJACENCY, description=f'MUST be "{VrsType.ADJACENCY.value}"') + type: Literal["Adjacency"] = Field(VrsType.ADJACENCY.value, description=f'MUST be "{VrsType.ADJACENCY.value}"') adjoinedSequences: List[Union[IRI, SequenceLocation]] = Field( ..., description="The terminal sequence or pair of adjoined sequences that defines in the adjacency.", @@ -631,7 +633,7 @@ class SequenceTerminus(_VariationBase): is not allowed and it removes the unnecessary array structure. """ - type: Literal[VrsType.SEQ_TERMINUS] = Field(VrsType.SEQ_TERMINUS, description=f'MUST be "{VrsType.SEQ_TERMINUS.value}"') + type: Literal["SequenceTerminus"] = Field(VrsType.SEQ_TERMINUS.value, description=f'MUST be "{VrsType.SEQ_TERMINUS.value}"') location: Union[IRI, SequenceLocation] = Field(..., description="The location of the terminus.") class ga4gh(_Ga4ghIdentifiableObject.ga4gh): @@ -647,7 +649,7 @@ class DerivativeSequence(_VariationBase): sequence composed from multiple sequence adjacencies. """ - type: Literal[VrsType.DERIVATIVE_SEQ] = Field(VrsType.DERIVATIVE_SEQ, description=f'MUST be "{VrsType.DERIVATIVE_SEQ.value}"') + type: Literal["DerivativeSequence"] = Field(VrsType.DERIVATIVE_SEQ.value, description=f'MUST be "{VrsType.DERIVATIVE_SEQ.value}"') components: List[Union[IRI, Adjacency, Allele, SequenceTerminus, CisPhasedBlock]] = Field( ..., description="The sequence components that make up the derivative sequence.", @@ -681,7 +683,7 @@ class CopyNumberCount(_CopyNumber): (e.g. genome, cell, etc.). """ - type: Literal[VrsType.CN_COUNT] = Field(VrsType.CN_COUNT, description=f'MUST be "{VrsType.CN_COUNT.value}"') + type: Literal["CopyNumberCount"] = Field(VrsType.CN_COUNT.value, description=f'MUST be "{VrsType.CN_COUNT.value}"') copies: Union[Range, int] = Field( ..., description='The integral number of copies of the subject in a system' ) @@ -700,7 +702,9 @@ class CopyNumberChange(_CopyNumber): (e.g. genome, cell, etc.) relative to a baseline ploidy. """ - type: Literal[VrsType.CN_CHANGE] = Field(VrsType.CN_CHANGE, description=f'MUST be "{VrsType.CN_CHANGE.value}"') + model_config = ConfigDict(use_enum_values=True) + + type: Literal["CopyNumberChange"] = Field(VrsType.CN_CHANGE.value, description=f'MUST be "{VrsType.CN_CHANGE.value}"') copyChange: CopyChange = Field( ..., description='MUST be one of "efo:0030069" (complete genomic loss), "efo:0020073" (high-level loss), "efo:0030068" (low-level loss), "efo:0030067" (loss), "efo:0030064" (regional base ploidy), "efo:0030070" (gain), "efo:0030071" (low-level gain), "efo:0030072" (high-level gain).', diff --git a/tests/validation/test_models.py b/tests/validation/test_models.py index 50c1ec63..f09f5db8 100644 --- a/tests/validation/test_models.py +++ b/tests/validation/test_models.py @@ -4,11 +4,12 @@ import os +from pydantic import ValidationError import pytest import yaml -from ga4gh.core import ga4gh_serialize, ga4gh_digest, ga4gh_identify, PrevVrsVersion, entity_models -from ga4gh.vrs import models +from ga4gh.core import ga4gh_serialize, ga4gh_digest, ga4gh_identify, PrevVrsVersion, entity_models, CommonDomainType, domain_models +from ga4gh.vrs import models, VrsType def ga4gh_1_3_identify(*args, **kwargs): kwargs['as_version'] = PrevVrsVersion.V1_3 @@ -96,3 +97,22 @@ def test_prev_vrs_version(): with pytest.raises(ValueError, match="Only `LiteralSequenceExpression` and `ReferenceLengthExpression` are supported for previous versions of VRS"): ga4gh_func(allele_le, as_version=PrevVrsVersion.V1_3) + + +def test_valid_types(): + """Ensure that type enums values correct. Values should correspond to class""" + for gks_models, gks_enum in [(models, VrsType), (domain_models, CommonDomainType)]: + for enum_val in gks_enum.__members__.values(): + enum_val = enum_val.value + if hasattr(gks_models, enum_val): + gks_class = getattr(gks_models, enum_val) + try: + assert gks_class(type=enum_val) + except ValidationError as e: + found_type_mismatch = False + for error in e.errors(): + if error["loc"] == ("type",): + found_type_mismatch = True + assert not found_type_mismatch, f"Found mismatch in type literal: {enum_val} vs {error['ctx']['expected']}" + else: + assert False, f"{str(gks_models)} class not found: {enum_val}"