Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Add pydantic classes for output from fusion callers #228

Merged
merged 11 commits into from
Jan 23, 2025
Next Next commit
Reorganize new models
jarbesfeld committed Jan 15, 2025
commit e0ae571115adbe8affb7ca379afc277a6f646873
64 changes: 63 additions & 1 deletion src/fusor/models.py
Original file line number Diff line number Diff line change
@@ -39,8 +39,10 @@ class FUSORTypes(str, Enum):
MULTIPLE_POSSIBLE_GENES_ELEMENT = "MultiplePossibleGenesElement"
BREAKPOINT_COVERAGE = "BreakpointCoverage"
CONTIG_SEQUENCE = "ContigSequence"
ANCHORED_READS = "AnchoredReads"
SPLIT_READS = "SplitReads"
SPANNING_READS = "SpanningReads"
READ_DATA = "ReadData"
REGULATORY_ELEMENT = "RegulatoryElement"
CATEGORICAL_FUSION = "CategoricalFusion"
ASSAYED_FUSION = "AssayedFusion"
@@ -154,6 +156,18 @@ class ContigSequence(BaseStructuralElement):
)


class AnchoredReads(BaseStructuralElement):
"""Define AnchoredReads class

This class can be used to report the number of reads that span the
fusion junction. This is used at the TranscriptSegment level, as it
indicates the transcript where the longer segment of the read is found
"""

type: Literal[FUSORTypes.ANCHORED_READS] = FUSORTypes.ANCHORED_READS
reads: int = Field(ge=0)


class SplitReads(BaseStructuralElement):
"""Define SplitReads class.

@@ -184,6 +198,28 @@ class SpanningReads(BaseStructuralElement):
)


class ReadData(BaseStructuralElement):
"""Define ReadData class.

This class is used at the AssayedFusion level when a fusion caller reports
metadata describing sequencing reads for the fusion event
"""

type: Literal[FUSORTypes.READ_DATA] = FUSORTypes.READ_DATA
split: SplitReads | None = None
spanning: SpanningReads | None = None

model_config = ConfigDict(
json_schema_extra={
"example": {
"type": "ReadData",
"split": {"type": "SplitReads", "splitReads": 100},
"spanning": {"type": "SpanningReads", "spanningReads": 80},
}
}
)


class TranscriptSegmentElement(BaseStructuralElement):
"""Define TranscriptSegment class"""

@@ -199,6 +235,7 @@ class TranscriptSegmentElement(BaseStructuralElement):
elementGenomicStart: SequenceLocation | None = None
elementGenomicEnd: SequenceLocation | None = None
coverage: BreakpointCoverage | None = None
anchoredReads: AnchoredReads | None = None

@model_validator(mode="before")
def check_exons(cls, values):
@@ -264,6 +301,14 @@ def check_exons(cls, values):
},
"start": 154170399,
},
"coverage": {
"type": "BreakpointCoverage",
"fragmentCoverage": 185,
},
"anchoredReads": {
"type": "AnchoredReads",
"reads": 100,
},
}
},
)
@@ -645,7 +690,8 @@ class Assay(BaseModelForbidExtra):
| TemplatedSequenceElement
| LinkerElement
| UnknownGeneElement
| ContigSequence,
| ContigSequence
| ReadData,
Field(discriminator="type"),
]

@@ -695,6 +741,7 @@ class AssayedFusion(AbstractFusion):
causativeEvent: CausativeEvent | None = None
assay: Assay | None = None
contig: ContigSequence | None = None
readData: ReadData | None = None

model_config = ConfigDict(
json_schema_extra={
@@ -712,6 +759,21 @@ class AssayedFusion(AbstractFusion):
"assayName": "fluorescence in-situ hybridization assay",
"fusionDetection": "inferred",
},
"contig": {
"type": "ContigSequence",
"contig": "GTACTACTGATCTAGCATCTAGTA",
},
"readData": {
"type": "ReadData",
"split": {
"type": "SplitReads",
"splitReads": 100,
},
"spanning": {
"type": "SpanningReads",
"spanningReads": 80,
},
},
"structure": [
{
"type": "GeneElement",
35 changes: 35 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@

from fusor.models import (
AbstractFusion,
AnchoredReads,
Assay,
AssayedFusion,
BreakpointCoverage,
@@ -17,6 +18,7 @@
GeneElement,
LinkerElement,
MultiplePossibleGenesElement,
ReadData,
RegulatoryElement,
SpanningReads,
SplitReads,
@@ -178,6 +180,8 @@ def transcript_segments(sequence_locations, gene_examples):
"gene": gene_examples[0],
"elementGenomicStart": sequence_locations[2],
"elementGenomicEnd": sequence_locations[3],
"coverage": BreakpointCoverage(fragmentCoverage=100),
"anchoredReads": AnchoredReads(reads=85),
},
{
"type": "TranscriptSegmentElement",
@@ -379,13 +383,17 @@ def test_transcript_segment_element(transcript_segments):
assert test_region_start.type == "SequenceLocation"
test_region_end = test_element.elementGenomicEnd
assert test_region_end.type == "SequenceLocation"
assert test_element.coverage.fragmentCoverage == 100
assert test_element.anchoredReads.reads == 85

test_element = TranscriptSegmentElement(**transcript_segments[3])
assert test_element.transcript == "refseq:NM_938439.4"
assert test_element.exonStart == 7
assert test_element.exonStartOffset == 0
assert test_element.exonEnd is None
assert test_element.exonEndOffset is None
assert test_element.coverage is None
assert test_element.anchoredReads is None

# check CURIE requirement
with pytest.raises(ValidationError) as exc_info:
@@ -640,6 +648,18 @@ def test_contig():
check_validation_error(exc_info, msg)


def test_anchored_reads():
"""Test that AnchoredReads class initializes correctly"""
test_anchored_reads = AnchoredReads(reads=100)
assert test_anchored_reads.reads == 100

# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert AnchoredReads(type="anchoredreads")
msg = "Input should be <FUSORTypes.ANCHORED_READS: 'AnchoredReads'>"
check_validation_error(exc_info, msg)


def test_split_reads():
"""Test that SplitReads class initializes correctly"""
test_split_reads = SplitReads(splitReads=97)
@@ -664,6 +684,21 @@ def test_spanning_reads():
check_validation_error(exc_info, msg)


def test_read_data():
"""Test that ReadData class initializes correctly"""
test_read_data = ReadData(
split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=90)
)
assert test_read_data.split.splitReads == 100
assert test_read_data.spanning.spanningReads == 90

# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert ReadData(type="readata")
msg = "Input should be <FUSORTypes.READ_DATA: 'ReadData'>"
check_validation_error(exc_info, msg)


def test_event():
"""Test Event object initializes correctly"""
rearrangement = EventType.REARRANGEMENT