Reorganize new models

cancervariants · jarbesfeld · Jan 23, 2025 · Jan 15, 2025 · Jan 15, 2025 · Jan 16, 2025
commit e0ae571115adbe8affb7ca379afc277a6f646873
diff --git a/src/fusor/models.py b/src/fusor/models.py
@@ -39,8 +39,10 @@ class FUSORTypes(str, Enum):
     MULTIPLE_POSSIBLE_GENES_ELEMENT = "MultiplePossibleGenesElement"
     BREAKPOINT_COVERAGE = "BreakpointCoverage"
     CONTIG_SEQUENCE = "ContigSequence"
+    ANCHORED_READS = "AnchoredReads"
     SPLIT_READS = "SplitReads"
     SPANNING_READS = "SpanningReads"
+    READ_DATA = "ReadData"
     REGULATORY_ELEMENT = "RegulatoryElement"
     CATEGORICAL_FUSION = "CategoricalFusion"
     ASSAYED_FUSION = "AssayedFusion"
@@ -154,6 +156,18 @@ class ContigSequence(BaseStructuralElement):
     )
 
 
+class AnchoredReads(BaseStructuralElement):
+    """Define AnchoredReads class
+
+    This class can be used to report the number of reads that span the
+    fusion junction. This is used at the TranscriptSegment level, as it
+    indicates the transcript where the longer segment of the read is found
+    """
+
+    type: Literal[FUSORTypes.ANCHORED_READS] = FUSORTypes.ANCHORED_READS
+    reads: int = Field(ge=0)
+
+
 class SplitReads(BaseStructuralElement):
     """Define SplitReads class.
 
@@ -184,6 +198,28 @@ class SpanningReads(BaseStructuralElement):
     )
 
 
+class ReadData(BaseStructuralElement):
+    """Define ReadData class.
+
+    This class is used at the AssayedFusion level when a fusion caller reports
+    metadata describing sequencing reads for the fusion event
+    """
+
+    type: Literal[FUSORTypes.READ_DATA] = FUSORTypes.READ_DATA
+    split: SplitReads | None = None
+    spanning: SpanningReads | None = None
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "type": "ReadData",
+                "split": {"type": "SplitReads", "splitReads": 100},
+                "spanning": {"type": "SpanningReads", "spanningReads": 80},
+            }
+        }
+    )
+
+
 class TranscriptSegmentElement(BaseStructuralElement):
     """Define TranscriptSegment class"""
 
@@ -199,6 +235,7 @@ class TranscriptSegmentElement(BaseStructuralElement):
     elementGenomicStart: SequenceLocation | None = None
     elementGenomicEnd: SequenceLocation | None = None
     coverage: BreakpointCoverage | None = None
+    anchoredReads: AnchoredReads | None = None
 
     @model_validator(mode="before")
     def check_exons(cls, values):
@@ -264,6 +301,14 @@ def check_exons(cls, values):
                     },
                     "start": 154170399,
                 },
+                "coverage": {
+                    "type": "BreakpointCoverage",
+                    "fragmentCoverage": 185,
+                },
+                "anchoredReads": {
+                    "type": "AnchoredReads",
+                    "reads": 100,
+                },
             }
         },
     )
@@ -645,7 +690,8 @@ class Assay(BaseModelForbidExtra):
     | TemplatedSequenceElement
     | LinkerElement
     | UnknownGeneElement
-    | ContigSequence,
+    | ContigSequence
+    | ReadData,
     Field(discriminator="type"),
 ]
 
@@ -695,6 +741,7 @@ class AssayedFusion(AbstractFusion):
     causativeEvent: CausativeEvent | None = None
     assay: Assay | None = None
     contig: ContigSequence | None = None
+    readData: ReadData | None = None
 
     model_config = ConfigDict(
         json_schema_extra={
@@ -712,6 +759,21 @@ class AssayedFusion(AbstractFusion):
                     "assayName": "fluorescence in-situ hybridization assay",
                     "fusionDetection": "inferred",
                 },
+                "contig": {
+                    "type": "ContigSequence",
+                    "contig": "GTACTACTGATCTAGCATCTAGTA",
+                },
+                "readData": {
+                    "type": "ReadData",
+                    "split": {
+                        "type": "SplitReads",
+                        "splitReads": 100,
+                    },
+                    "spanning": {
+                        "type": "SpanningReads",
+                        "spanningReads": 80,
+                    },
+                },
                 "structure": [
                     {
                         "type": "GeneElement",

diff --git a/tests/test_models.py b/tests/test_models.py
@@ -6,6 +6,7 @@
 
 from fusor.models import (
     AbstractFusion,
+    AnchoredReads,
     Assay,
     AssayedFusion,
     BreakpointCoverage,
@@ -17,6 +18,7 @@
     GeneElement,
     LinkerElement,
     MultiplePossibleGenesElement,
+    ReadData,
     RegulatoryElement,
     SpanningReads,
     SplitReads,
@@ -178,6 +180,8 @@ def transcript_segments(sequence_locations, gene_examples):
             "gene": gene_examples[0],
             "elementGenomicStart": sequence_locations[2],
             "elementGenomicEnd": sequence_locations[3],
+            "coverage": BreakpointCoverage(fragmentCoverage=100),
+            "anchoredReads": AnchoredReads(reads=85),
         },
         {
             "type": "TranscriptSegmentElement",
@@ -379,13 +383,17 @@ def test_transcript_segment_element(transcript_segments):
     assert test_region_start.type == "SequenceLocation"
     test_region_end = test_element.elementGenomicEnd
     assert test_region_end.type == "SequenceLocation"
+    assert test_element.coverage.fragmentCoverage == 100
+    assert test_element.anchoredReads.reads == 85
 
     test_element = TranscriptSegmentElement(**transcript_segments[3])
     assert test_element.transcript == "refseq:NM_938439.4"
     assert test_element.exonStart == 7
     assert test_element.exonStartOffset == 0
     assert test_element.exonEnd is None
     assert test_element.exonEndOffset is None
+    assert test_element.coverage is None
+    assert test_element.anchoredReads is None
 
     # check CURIE requirement
     with pytest.raises(ValidationError) as exc_info:
@@ -640,6 +648,18 @@ def test_contig():
     check_validation_error(exc_info, msg)
 
 
+def test_anchored_reads():
+    """Test that AnchoredReads class initializes correctly"""
+    test_anchored_reads = AnchoredReads(reads=100)
+    assert test_anchored_reads.reads == 100
+
+    # test enum validation
+    with pytest.raises(ValidationError) as exc_info:
+        assert AnchoredReads(type="anchoredreads")
+    msg = "Input should be <FUSORTypes.ANCHORED_READS: 'AnchoredReads'>"
+    check_validation_error(exc_info, msg)
+
+
 def test_split_reads():
     """Test that SplitReads class initializes correctly"""
     test_split_reads = SplitReads(splitReads=97)
@@ -664,6 +684,21 @@ def test_spanning_reads():
     check_validation_error(exc_info, msg)
 
 
+def test_read_data():
+    """Test that ReadData class initializes correctly"""
+    test_read_data = ReadData(
+        split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=90)
+    )
+    assert test_read_data.split.splitReads == 100
+    assert test_read_data.spanning.spanningReads == 90
+
+    # test enum validation
+    with pytest.raises(ValidationError) as exc_info:
+        assert ReadData(type="readata")
+    msg = "Input should be <FUSORTypes.READ_DATA: 'ReadData'>"
+    check_validation_error(exc_info, msg)
+
+
 def test_event():
     """Test Event object initializes correctly"""
     rearrangement = EventType.REARRANGEMENT