Skip to content

Commit

Permalink
better gtf file support
Browse files Browse the repository at this point in the history
  • Loading branch information
e-sollier committed Jul 7, 2024
1 parent aad6c12 commit b543243
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 11 deletions.
2 changes: 1 addition & 1 deletion figeno/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from figeno.cli import gui, init,make

__version__ = "1.4.3"
__version__ = "1.4.4"

def main():
parser = ArgumentParser("figeno",formatter_class=ArgumentDefaultsHelpFormatter)
Expand Down
21 changes: 16 additions & 5 deletions figeno/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def read_transcripts_names(file,gene_names):



def read_transcripts(file,chr=None,start=None,end=None,gene_names="auto",collapsed=True,only_protein_coding=True):
def read_transcripts(file,chr=None,start=None,end=None,gene_names="auto",collapsed=True,only_protein_coding=True,warnings=[]):
"""Wrapper depending on the file type"""
if gene_names =="auto" or gene_names=="": gene_names = None
if gene_names is not None:
Expand All @@ -190,7 +190,7 @@ def read_transcripts(file,chr=None,start=None,end=None,gene_names="auto",collaps
elif file.endswith("gff3") or file.endswith("gff3.gz"):
return read_genes_gff3(file,chr,start,end,gene_names,collapsed=collapsed,only_protein_coding=only_protein_coding)
elif file.endswith("gtf") or file.endswith("gtf.gz"):
return read_genes_gtf(file,chr,start,end,gene_names,collapsed=collapsed,only_protein_coding=only_protein_coding)
return read_genes_gtf(file,chr,start,end,gene_names,collapsed=collapsed,only_protein_coding=only_protein_coding,warnings=warnings)
else:
raise KnownException("The extension for genes file "+str(file)+" was not recognized. The filename must end with .txt(.gz) for RefSeq format, .gtf(.gz) for gtf or .gff3(.gz) for gff3.")

Expand Down Expand Up @@ -267,7 +267,7 @@ def read_genes_refseq(file,chr=None,start=None,end=None,gene_names=None,collapse



def read_genes_gtf(gtf_file,chr=None,start=None,end=None,gene_names=None,collapsed=True,only_protein_coding=True):
def read_genes_gtf(gtf_file,chr=None,start=None,end=None,gene_names=None,collapsed=True,only_protein_coding=True,warnings=[]):
if gene_names is not None: gene_names=gene_names.upper()
transcripts={}
name2exons={}
Expand All @@ -294,8 +294,16 @@ def read_genes_gtf(gtf_file,chr=None,start=None,end=None,gene_names=None,collaps
elif x.lstrip(" ").startswith("transcript_name"):
x = x[x.find("\"")+1:]
transcript_name = x[:x.find("\"")]
elif transcript_name=="" and x.lstrip(" ").startswith("transcript_id"):
x = x[x.find("\"")+1:]
transcript_name = x[:x.find("\"")]
if (gene_names is not None) and (not gene_name.upper() in gene_names): continue
if only_protein_coding and (not "protein_coding" in linesplit[8]): continue
if only_protein_coding and (not "protein_coding" in linesplit[8]):
if ("gene_biotype" in linesplit[8]): continue
else:
warn_message="Could not filter for protein coding genes because this information was not provided in the gtf file."
if not warn_message in warnings: warnings.append(warn_message)

if collapsed: name = gene_name
else: name=transcript_name
transcript = Transcript(gene_name,linesplit[0].lstrip("chr"),transcript_start,transcript_end,transcript_orientation,[])
Expand All @@ -310,7 +318,10 @@ def read_genes_gtf(gtf_file,chr=None,start=None,end=None,gene_names=None,collaps
if x.lstrip(" ").startswith("transcript_name"):
x = x[x.find("\"")+1:]
transcript_name = x[:x.find("\"")]
if x.lstrip(" ").startswith("gene_name"):
elif transcript_name=="" and x.lstrip(" ").startswith("transcript_id"):
x = x[x.find("\"")+1:]
transcript_name = x[:x.find("\"")]
elif x.lstrip(" ").startswith("gene_name"):
x = x[x.find("\"")+1:]
gene_name = x[:x.find("\"")]
if (gene_names is not None) and (not gene_name in gene_names): continue
Expand Down
2 changes: 1 addition & 1 deletion figeno/gui/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "figeno",
"version": "1.4.3",
"version": "1.4.4",
"private": true,
"homepage": "./",
"dependencies": {
Expand Down
6 changes: 3 additions & 3 deletions figeno/track_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self,reference="custom",genes_file="",style="default",collapsed=Tru
def draw(self, regions, box ,hmargin=0,warnings=[]):
boxes = split_box(box,regions,hmargin)
self.margin_between_genes = 1.5*np.sum([abs(r[0].end-r[0].start) for r in regions]) / abs(box["right"]-box["left"]) # in bp
lines_regions = self.read_transcripts_lines_regions(regions)
lines_regions = self.read_transcripts_lines_regions(regions,warnings=warnings)
max_region_size=0
for i in range(len(regions)):
max_region_size=max(max_region_size,abs(regions[i][0].end-regions[i][0].start))
Expand Down Expand Up @@ -175,7 +175,7 @@ def draw_title(self,box):
box["ax"].text(box["left"] - 1.0,(box["top"]+box["bottom"])/2,
self.label,rotation=rotation,horizontalalignment="right",verticalalignment="center",fontsize=7*self.fontscale)

def read_transcripts_lines_regions(self,regions):
def read_transcripts_lines_regions(self,regions,warnings=[]):
regions = [reg[0] for reg in regions]
lines_regions=[]
max_nlines=0
Expand All @@ -188,7 +188,7 @@ def read_transcripts_lines_regions(self,regions):
raise KnownException("When using a custom reference genome, you have to provide a genes file if you want to display a genes track. See https://figeno.readthedocs.io/en/latest/content/describe_figure.html#general for the format of this file.")
else:
transcripts = read_transcripts(self.genes_file,region.chr,region.start,region.end,self.genes,
collapsed=self.collapsed,only_protein_coding=self.only_protein_coding)
collapsed=self.collapsed,only_protein_coding=self.only_protein_coding,warnings=warnings)

# Assign transcripts to lines, so that they do not overlap
transcripts= sorted(transcripts,key=lambda x:x.start)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ packages = ["figeno", "figeno.data", "figeno.cli", "figeno.gui"]

[project]
name = 'figeno'
version = "1.4.3"
version = "1.4.4"
description = 'Package for generating genomics figures.'
readme = 'README.md'
authors = [
Expand Down

0 comments on commit b543243

Please sign in to comment.