-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4e00a59
commit a722022
Showing
4 changed files
with
270 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
"""Harvester methods for output from different fusion callers""" | ||
|
||
import csv | ||
from abc import ABC | ||
from pathlib import Path | ||
from typing import ClassVar | ||
|
||
from fusor.fusion_caller_models import ( | ||
JAFFA, | ||
Arriba, | ||
Cicero, | ||
EnFusion, | ||
FusionCaller, | ||
FusionCatcher, | ||
Genie, | ||
STARFusion, | ||
) | ||
|
||
|
||
class FusionCallerHarvester(ABC): | ||
"""ABC for fusion caller harvesters""" | ||
|
||
fusion_caller: FusionCaller | ||
column_rename: dict | ||
delimeter: str | ||
|
||
def load_records( | ||
self, | ||
fusion_path: Path, | ||
) -> list[FusionCaller]: | ||
"""Convert rows of fusion caller output to Pydantic classes | ||
:param path: The path to the fusions file | ||
:param column_rename: A dictionary of column mappings | ||
:param delimeter: The delimeter for the fusions file | ||
:raise ValueError: if the file does not exist at the specified path | ||
:return: A list of fusions, represented as Pydantic objects | ||
""" | ||
if not fusion_path.exists(): | ||
statement = f"{fusion_path!s} does not exist" | ||
raise ValueError(statement) | ||
fusions_list = [] | ||
fields_to_keep = self.fusion_caller.__annotations__.keys() | ||
with fusion_path.open() as csvfile: | ||
reader = csv.DictReader(csvfile, delimiter=self.delimeter) | ||
for row in reader: | ||
row = { | ||
self.column_rename.get(key, key): value | ||
for key, value in row.items() | ||
} | ||
filered_row = { | ||
key: value for key, value in row.items() if key in fields_to_keep | ||
} | ||
fusions_list.append(self.fusion_caller(**filered_row)) | ||
return fusions_list | ||
|
||
|
||
class JAFFAHarvester(FusionCallerHarvester): | ||
"""Class for harvesting JAFFA data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"fusion genes": "fusion_genes", | ||
"spanning reads": "spanning_reads", | ||
"spanning pairs": "spanning_pairs", | ||
} | ||
delimeter = "," | ||
fusion_caller = JAFFA | ||
|
||
|
||
class StarFusionHarvester(FusionCallerHarvester): | ||
"""Class for harvesting STAR-Fusion data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"LeftGene": "left_gene", | ||
"RightGene": "right_gene", | ||
"LeftBreakpoint": "left_breakpoint", | ||
"RightBreakpoint": "right_breakpoint", | ||
"JunctionReadCount": "junction_read_count", | ||
"SpanningFragCount": "spanning_frag_count", | ||
} | ||
delimeter = "\t" | ||
fusion_caller = STARFusion | ||
|
||
|
||
class FusionCatcherHarvester(FusionCallerHarvester): | ||
"""Class for harvesting FusionCatcher data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"Gene_1_symbol(5end_fusion_partner)": "five_prime_partner", | ||
"Gene_2_symbol(3end_fusion_partner)": "three_prime_partner", | ||
"Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point", | ||
"Fusion_point_for_gene_2(3end_fusion_partner)": "three_prime_fusion_point", | ||
"Predicted_effect": "predicted_effect", | ||
"Spanning_unique_reads": "spanning_unique_reads", | ||
"Spanning_pairs": "spanning_reads", | ||
"Fusion_sequence": "fusion_sequence", | ||
} | ||
delimeter = "\t" | ||
fusion_caller = FusionCatcher | ||
|
||
|
||
class ArribaHarvester(FusionCallerHarvester): | ||
"""Class for harvesting Arriba data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"#gene1": "gene1", | ||
"strand1(gene/fusion)": "strand1", | ||
"strand2(gene/fusion)": "strand2", | ||
"type": "event_type", | ||
"reading_frame": "rf", | ||
} | ||
delimeter = "\t" | ||
fusion_caller = Arriba | ||
|
||
|
||
class CiceroHarvester(FusionCallerHarvester): | ||
"""Class for harvesting Cicero data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"geneA": "gene_5prime", | ||
"geneB": "gene_3prime", | ||
"chrA": "chr_5prime", | ||
"chrB": "chr_3prime", | ||
"posA": "pos_5prime", | ||
"posB": "pos_3prime", | ||
"type": "event_type", | ||
"readsA": "reads_5prime", | ||
"readsB": "reads_3prime", | ||
"coverageA": "coverage_5prime", | ||
"coverageB": "coverage_3prime", | ||
} | ||
delimeter = "\t" | ||
fusion_caller = Cicero | ||
|
||
|
||
class EnFusionHarvester(FusionCallerHarvester): | ||
"""Class for harvesting EnFusion data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"Gene1": "gene_5prime", | ||
"Gene2": "gene_3prime", | ||
"Chr1": "chr_5prime", | ||
"Chr2": "chr_3prime", | ||
"Break1": "break_5prime", | ||
"Break2": "break_3prime", | ||
"FusionJunctionSequence": "fusion_junction_sequence", | ||
} | ||
delimeter = "\t" | ||
fusion_caller = EnFusion | ||
|
||
|
||
class GenieHarvester(FusionCallerHarvester): | ||
"""Class for harvesting Genie data""" | ||
|
||
column_rename: ClassVar[dict] = { | ||
"Site1_Hugo_Symbol": "site1_hugo", | ||
"Site2_Hugo_Symbol": "site2_hugo", | ||
"Site1_Chromosome": "site1_chrom", | ||
"Site2_Chromosome": "site2_chrom", | ||
"Site1_Position": "site1_pos", | ||
"Site2_Position": "site2_pos", | ||
"Site2_Effect_On_Frame": "reading_frame", | ||
"Annotation": "annot", | ||
} | ||
delimeter = "\t" | ||
fusion_caller = Genie |
Oops, something went wrong.