Skip to content

Commit

Permalink
Merge pull request #267 from alliance-genome/variation_cleanup
Browse files Browse the repository at this point in the history
Tweaks to variant model
  • Loading branch information
markquintontulloch authored Oct 9, 2024
2 parents 157f4ab + 4a9f6b3 commit 41b2abd
Showing 4 changed files with 34 additions and 82 deletions.
64 changes: 8 additions & 56 deletions generated/jsonschema/allianceModel.schema.json
Original file line number Diff line number Diff line change
@@ -8892,10 +8892,6 @@
"description": "A high-level grouping for the relationship type. This is analogous to category for nodes. In RDF, this corresponds to rdf:predicate and in Neo4j this corresponds to the relationship type.",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -8977,8 +8973,8 @@
"description": "The sequence of DNA base pairs removed from the reference sequence by the variant.",
"type": "string"
},
"dna_mutation_type": {
"description": "The type of DNA mutation, for example, insertion, deletion, substitution, or indel.",
"dna_mutation_type_curie": {
"description": "Curie of SOTerm describing the type of DNA mutation, for example, insertion, deletion, substitution, or indel.",
"type": "string"
},
"end": {
@@ -8992,8 +8988,8 @@
},
"type": "array"
},
"gene_localization_type": {
"description": "The type of gene localization, for example, intronic, intergenic, or intragenic.",
"gene_localization_type_curie": {
"description": "Curie of SOTerm describing the type of gene localization, for example, intronic, intergenic, or intragenic.",
"type": "string"
},
"inserted_sequence": {
@@ -9033,10 +9029,6 @@
"description": "Name of VocabularyTerm representing relation of an Association",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -9156,10 +9148,6 @@
"description": "A high-level grouping for the relationship type. This is analogous to category for nodes. In RDF, this corresponds to rdf:predicate and in Neo4j this corresponds to the relationship type.",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -9279,10 +9267,6 @@
"description": "Name of VocabularyTerm representing relation of an Association",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -9391,10 +9375,6 @@
"description": "A high-level grouping for the relationship type. This is analogous to category for nodes. In RDF, this corresponds to rdf:predicate and in Neo4j this corresponds to the relationship type.",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -9504,10 +9484,6 @@
"description": "Name of VocabularyTerm representing relation of an Association",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -21980,10 +21956,6 @@
"description": "A high-level grouping for the relationship type. This is analogous to category for nodes. In RDF, this corresponds to rdf:predicate and in Neo4j this corresponds to the relationship type.",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"single_reference": {
"$ref": "#/$defs/Reference",
"description": "holds between an object and a single reference"
@@ -22067,8 +22039,8 @@
"description": "The sequence of DNA base pairs removed from the reference sequence by the variant.",
"type": "string"
},
"dna_mutation_type": {
"description": "The type of DNA mutation, for example, insertion, deletion, substitution, or indel.",
"dna_mutation_type_curie": {
"description": "Curie of SOTerm describing the type of DNA mutation, for example, insertion, deletion, substitution, or indel.",
"type": "string"
},
"end": {
@@ -22082,8 +22054,8 @@
},
"type": "array"
},
"gene_localization_type": {
"description": "The type of gene localization, for example, intronic, intergenic, or intragenic.",
"gene_localization_type_curie": {
"description": "Curie of SOTerm describing the type of gene localization, for example, intronic, intergenic, or intragenic.",
"type": "string"
},
"inserted_sequence": {
@@ -22127,10 +22099,6 @@
"description": "Name of VocabularyTerm representing relation of an Association",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -22249,10 +22217,6 @@
"description": "A high-level grouping for the relationship type. This is analogous to category for nodes. In RDF, this corresponds to rdf:predicate and in Neo4j this corresponds to the relationship type.",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"single_reference": {
"$ref": "#/$defs/Reference",
"description": "holds between an object and a single reference"
@@ -22378,10 +22342,6 @@
"description": "Name of VocabularyTerm representing relation of an Association",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
@@ -22489,10 +22449,6 @@
"description": "A high-level grouping for the relationship type. This is analogous to category for nodes. In RDF, this corresponds to rdf:predicate and in Neo4j this corresponds to the relationship type.",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"single_reference": {
"$ref": "#/$defs/Reference",
"description": "holds between an object and a single reference"
@@ -22608,10 +22564,6 @@
"description": "Name of VocabularyTerm representing relation of an Association",
"type": "string"
},
"sequence_of_reference_accession_number": {
"description": "The accession number that describes the assembly and assembly component (chromosome or scaffold) of the reference sequence.",
"type": "string"
},
"start": {
"description": "The start of the feature in positive 1-based integer coordinates relative to the reference landmark.",
"type": "integer"
21 changes: 18 additions & 3 deletions model/schema/variantDTO.yaml
Original file line number Diff line number Diff line change
@@ -55,7 +55,6 @@ classes:
- variant_sequence
- consequence_curie
- curated_consequence_curie
- sequence_of_reference_accession_number

VariantGenomicLocationAssociationDTO:
is_a: VariantLocationAssociationDTO
@@ -68,8 +67,8 @@ classes:
- inserted_sequence
- deleted_sequence
- padded_base
- dna_mutation_type
- gene_localization_type
- dna_mutation_type_curie
- gene_localization_type_curie

VariantTranscriptLocationAssociationDTO:
is_a: VariantLocationAssociationDTO
@@ -180,6 +179,22 @@ slots:
domain: VariantLocationAssociationDTO
range: string

dna_mutation_type_curie:
description: >-
Curie of SOTerm describing the type of DNA mutation, for example, insertion, deletion, substitution, or indel.
range: SOTerm
examples:
- value: SO:1000027 # G_to_T_transversion
notes: >-
This is a ZFIN specific field.
gene_localization_type_curie:
description: >-
Curie of SOTerm describing the type of gene localization, for example, intronic, intergenic, or intragenic.
range: string
examples:
- value: SO:0000147 # exon

source_general_consequence_dtos:
domain: VariantDTO
range: VariantSourceGeneralConsequenceSlotAnnotationDTO
21 changes: 4 additions & 17 deletions model/schema/variation.yaml
Original file line number Diff line number Diff line change
@@ -82,7 +82,6 @@ classes:
- variant_sequence
- consequence
- curated_consequence
- sequence_of_reference_accession_number # Should this point to a chromosome object instead of a string? Will this be redundant with genomic location association of the subject?

VariantGenomicLocationAssociation:
is_a: VariantLocationAssociation
@@ -257,19 +256,19 @@ slots:
exact_mappings:
- SO:0001889

dna_mutation_type: # Should the range of this be an object (SOTerm / OntologyTerm) rather than just a curie?
dna_mutation_type:
description: >-
The type of DNA mutation, for example, insertion, deletion, substitution, or indel.
range: uriorcurie
range: SOTerm
examples:
- value: SO:1000027 # G_to_T_transversion
notes: >-
This is a ZFIN specific field.
gene_localization_type: # Should the range of this be an object (SOTerm / OntologyTerm) rather than just a curie?
gene_localization_type:
description: >-
The type of gene localization, for example, intronic, intergenic, or intragenic.
range: uriorcurie
range: SOTerm
examples:
- value: SO:0000147 # exon

@@ -380,18 +379,6 @@ slots:
required: true
multivalued: false

sequence_of_reference_accession_number:
description: >-
The accession number that describes the assembly and assembly component
(chromosome or scaffold) of the reference sequence.
notes: >-
With redefined Location Associations to AssemblyComponent, Transcript or
Protein, is this redundant?
required: false
multivalued: false
domain: VariantLocationAssociation
range: string

consequence:
description: >-
SOTerm (child of SO:0001576 - transcript_variant) that describes the
10 changes: 4 additions & 6 deletions test/data/variant_association_ingest_test.json
Original file line number Diff line number Diff line change
@@ -12,7 +12,6 @@
"end": 30341645,
"reference_sequence": "C",
"variant_sequence": "T",
"sequence_of_reference_accession_number": "NC_007118.7",
"internal": false,
"obsolete": false,
"date_created": "2015-04-09T10:15:30+00:00",
@@ -25,8 +24,8 @@
"inserted_sequence": "AT",
"deleted_sequence": "GCGC",
"padded_base": "A",
"dna_mutation_type": "SO:001231",
"gene_localization_type": "SO:121313"
"dna_mutation_type_curie": "SO:001231",
"gene_localization_type_curie": "SO:121313"
}
],
"variant_transcript_location_association_ingest_set": [
@@ -82,7 +81,6 @@
"end": 30341645,
"reference_sequence": "C",
"variant_sequence": "T",
"sequence_of_reference_accession_number": "NC_007118.7",
"internal": false,
"obsolete": false,
"date_created": "2015-04-09T10:15:30+00:00",
@@ -95,8 +93,8 @@
"inserted_sequence": "AT",
"deleted_sequence": "GCGC",
"padded_base": "A",
"dna_mutation_type": "SO:001231",
"gene_localization_type": "SO:121313"
"dna_mutation_type_curie": "SO:001231",
"gene_localization_type_curie": "SO:121313"
}
],
"source_variant_transcript_location_association_ingest_set": [

0 comments on commit 41b2abd

Please sign in to comment.