Skip to content

Commit

Permalink
initial support for biopython SeqFeatures
Browse files Browse the repository at this point in the history
see #42
  • Loading branch information
daler committed Jan 18, 2015
1 parent 87c2a37 commit f82fd2e
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 2 deletions.
3 changes: 1 addition & 2 deletions gffutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,4 @@
from gffutils.helpers import example_filename
from gffutils.exceptions import FeatureNotFoundError, DuplicateIDError
from gffutils.version import version as __version__


from gffutils import biopython_integration
66 changes: 66 additions & 0 deletions gffutils/biopython_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Module for integration with BioPython, specifically SeqRecords and SeqFeature
objects.
"""
try:
from Bio.SeqFeature import SeqFeature, FeatureLocation
except ImportError:
raise ImportError(
"BioPython must be installed to use this module")
from . import Feature

_biopython_strand = {
'+': 1,
'-': -1,
'.': 0,
}
_feature_strand = dict((v, k) for k, v in _biopython_strand.items())


def to_seqfeature(f):
"""
Converts a gffutils.Feature object to a Bio.SeqFeature object.
The GFF fields `source`, `score`, `seqid`, and `frame` are stored as
qualifiers. GFF `attributes` are also stored as qualifiers.
"""
qualifiers = {
'source': [f.source],
'score': [f.score],
'seqid': [f.seqid],
'frame': [f.frame],
}
qualifiers.update(f.attributes)
return SeqFeature(
FeatureLocation(f.start, f.stop),
id=f.id,
type=f.featuretype,
strand=_biopython_strand[f.strand],
qualifiers=qualifiers
)


def from_seqfeature(s, **kwargs):
"""
Converts a Bio.SeqFeature object to a gffutils.Feature object.
The GFF fields `source`, `score`, `seqid`, and `frame` are assumed to be
stored as qualifiers. Any other qualifiers will be assumed to be GFF
attributes.
"""
source = s.qualifiers.get('source', '.')[0]
score = s.qualifiers.get('score', '.')[0]
seqid = s.qualifiers.get('seqid', '.')[0]
frame = s.qualifiers.get('frame', '.')[0]
strand = _feature_strand[s.strand]
start = s.location.start.position
stop = s.location.end.position
featuretype = s.type
id = s.id
attributes = dict(s.qualifiers)
attributes.pop('source')
attributes.pop('score')
attributes.pop('seqid')
attributes.pop('frame')
return Feature(seqid, source, featuretype, start, stop, score, strand,
frame, attributes, **kwargs)
16 changes: 16 additions & 0 deletions gffutils/test/test_biopython_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from gffutils import example_filename, create, parser, feature
import gffutils
import gffutils.biopython_integration as bp

def test_roundtrip():
"""
Feature -> SeqFeature -> Feature should be invariant.
"""
db_fname = gffutils.example_filename("gff_example1.gff3")
db = gffutils.create_db(db_fname, ':memory:')
feature = db['ENSMUSG00000033845']
feature.keep_order = True
dialect = feature.dialect
s = bp.to_seqfeature(feature)
f = bp.from_seqfeature(s, dialect=dialect, keep_order=True)
assert feature == f

0 comments on commit f82fd2e

Please sign in to comment.