Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge: Fixes issues when ECTyper doesn't find all alleles for Phylotyper #309

Merged
merged 28 commits into from
Jun 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
756a24f
debug: catch the stderr
kevinkle Jun 16, 2018
7b71800
FIX: incl graph information from phylotyper ontology
kevinkle Jun 18, 2018
0481ec2
ADD: first draft of sequences.validate()
kevinkle Jun 18, 2018
6776db8
add: file which error w phylotyper to test suite
kevinkle Jun 18, 2018
bc1fadc
debug: wasnt erroring anymore, but returning no loci all the time
kevinkle Jun 18, 2018
493d179
debug: just throw an exception
kevinkle Jun 18, 2018
26140ee
debug: just throw an exception
kevinkle Jun 18, 2018
55539f5
debug: no tuples in query_result?
kevinkle Jun 18, 2018
1e15497
fix?: return found/notfound
kevinkle Jun 18, 2018
7a2d988
fix: nvm was missing a return
kevinkle Jun 18, 2018
c8ace8d
del: exceptions from debugging
kevinkle Jun 18, 2018
e317b85
debug: why call phylotyper
kevinkle Jun 18, 2018
892bfe1
fix?: the _subtype_query wouldve returned true if in ontology, wasnt …
kevinkle Jun 18, 2018
8b4fe78
del: ok for if fasta=None
kevinkle Jun 18, 2018
cf76a0c
debug: stx2 still passing validation, have it query for :VirulenceFac…
kevinkle Jun 18, 2018
e3c4ca9
debug: double check what the query is returning
kevinkle Jun 19, 2018
6497c3b
debug: double check what the query is returning
kevinkle Jun 19, 2018
6ed74cf
debug: double check what the query is returning
kevinkle Jun 19, 2018
d54e6ec
debug: double check what the query is returning
kevinkle Jun 19, 2018
7b4a245
debug: double check what the query is returning
kevinkle Jun 19, 2018
4ffcb06
fix: problem w sig
kevinkle Jun 19, 2018
5af9b5b
fix: problem w sig
kevinkle Jun 19, 2018
75cb412
debug
kevinkle Jun 19, 2018
17d0171
debug: run all vf
kevinkle Jun 19, 2018
a7ff89c
debug: clos
kevinkle Jun 19, 2018
e804237
debug: check bool
kevinkle Jun 19, 2018
4702dad
fix: validation should be ok
kevinkle Jun 19, 2018
0784b94
del: error for debugging
kevinkle Jun 19, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions app/modules/phylotyper/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
typing_ontology_version = '<https://www.github.com/superphy/typing/1.0.0>'
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))


LOCI = {
'stx1': [':stx1A', ':stx1B'],
'stx2': [':stx2A', ':stx2B'],
'eae': [':eae']
}

@submit
@prefix
Expand Down Expand Up @@ -158,9 +162,9 @@ def generate_graph(uri, loci, values):
subtype = uri.split(':')[1]

# Check for existance of schema Marker components
for l in loci:
if not find_object(l, ':Marker'):
raise DatabaseError(uri, l)
# for l in loci:
# if not find_object(l, ':Marker'):
# raise DatabaseError(uri, l)

# Proceed with creating subtype schema
graph = Graph()
Expand Down Expand Up @@ -214,7 +218,7 @@ def stx1_graph():

"""

return generate_graph('subt:stx1', [':stx1A',':stx1B'], ['a','c','d','untypeable'])
return generate_graph('subt:stx1', LOCI['stx1'], ['a','c','d','untypeable'])



Expand All @@ -224,7 +228,7 @@ def stx2_graph():

"""

return generate_graph('subt:stx2', [':stx2A',':stx2B'], ['a','b','c','d','e','f','g','untypeable'])
return generate_graph('subt:stx2', LOCI['stx2'], ['a','b','c','d','e','f','g','untypeable'])


def eae_graph():
Expand All @@ -233,7 +237,7 @@ def eae_graph():

"""

return generate_graph('subt:eae', [':eae'],
return generate_graph('subt:eae', LOCI['eae'],
["alpha-1","alpha-2","beta-1","beta-2","epsilon-1","epsilon-2","eta-1","eta-2",
"gamma-1","iota-1","iota-2","kappa-1","lambda-1","mu-1","nu-1","omicron-1","pi-1",
"rho-1","sigma-1","theta-2","xi-1","zeta-1","untypeable"])
Expand Down
63 changes: 32 additions & 31 deletions app/modules/phylotyper/phylotyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,14 @@
redis_url = config.REDIS_URL
redis_conn = redis.from_url(redis_url)

def _check_tsv(pt_file):
pt_results = pd.read_table(pt_file)
if pt_results.empty:
raise Exception('_check_tsv() failed as pt_results.empty == true for pt_file: {0} with df content: {1}'.format(pt_file, str(pt_results)))
def _noloci(output_file):
"""Writes out no loci found.
"""
with open(output_file, 'w') as fh:
fh.write('\t'.join(['genome', 'tree_label', 'subtype',
'probability', 'phylotyper_assignment', 'loci']))
fh.write('\t'.join(['lcl|query|', 'not applicable', 'not applicable',
'not applicable', 'Subtype loci not found in genome', 'not applicable']))

def phylotyper(uriIsolate, subtype, result_file, id_file=None, job_id=None, job_turtle=None, job_ectyper_datastruct_vf=None):
""" Wrapper for Phylotyper
Expand Down Expand Up @@ -79,41 +83,38 @@ def phylotyper(uriIsolate, subtype, result_file, id_file=None, job_id=None, job_
loci_results = ontology.schema_query(uri)
loci = [ gu(l['locus']) for l in sorted(loci_results, key=lambda k: k['i'])]

# Get alleles for this genome
markerseqs = MarkerSequences(loci, job_id, job_turtle, job_ectyper_datastruct_vf, redis_conn)
fasta = markerseqs.fasta(uriIsolate)

temp_dir = mkdtemp(prefix='pt'+subtype, dir=config.DATASTORE)
# Temp files.
temp_dir = mkdtemp(prefix='pt' + subtype, dir=config.DATASTORE)
query_file = os.path.join(temp_dir, 'query.fasta')
output_file = os.path.join(temp_dir, 'subtype_predictions.tsv')

if fasta:
# Run phylotyper
with open(query_file, 'w') as fh:
fh.write(fasta)

subprocess.check_call(['phylotyper', 'genome', '--noplots',
subtype,
temp_dir,
query_file])

# Get alleles for this genome
markerseqs = MarkerSequences(loci, job_id, job_turtle, job_ectyper_datastruct_vf, redis_conn)
# Validation all the alleles required are in this genome.
if not markerseqs.validate(subtype):
_noloci(output_file)
else:
# No loci
# raise Exception('phylotyper.phylotyper() could not retrieve reference sequences for loci: {0}, uriIsolate: {1}, subtype: {2}'.format(
# str(loci),
# str(uriIsolate),
# subtype
# ))
# Report no loci status in output
with open(output_file, 'w') as fh:
fh.write('\t'.join(['genome','tree_label','subtype','probability','phylotyper_assignment','loci']))
fh.write('\t'.join(['lcl|query|','not applicable','not applicable','not applicable','Subtype loci not found in genome','not applicable']))
fasta = markerseqs.fasta(uriIsolate)
if fasta:
# Run phylotyper
with open(query_file, 'w') as fh:
fh.write(fasta)
subprocess.check_call(
['phylotyper',
'genome',
'--noplots',
subtype,
temp_dir,
query_file],
stderr=subprocess.STDOUT)

else:
# No loci
_noloci(output_file)

shutil.move(output_file, result_file)
shutil.rmtree(temp_dir)

# _check_tsv(result_file)

return result_file


Expand Down
54 changes: 53 additions & 1 deletion app/modules/phylotyper/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from middleware.decorators import submit, prefix, tojson
from middleware.graphers import turtle_utils
from routes.job_utils import fetch_job
from modules.phylotyper.ontology import stx1_graph, stx2_graph, eae_graph, LOCI
from middleware.graphers.turtle_utils import generate_uri as gu

__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

Expand Down Expand Up @@ -136,8 +138,10 @@ def __init__(self, markers=[':stx2A',':stx2B'], job_id=None, job_turtle=None, jo
g = Graph()
ontology_turtle_file = os.path.join(__location__, 'superphy_subtyping.ttl')
g.parse(ontology_turtle_file, format="turtle")
# Add Phylotyper ontology graphs.
g = g + stx1_graph() + stx2_graph() + eae_graph()
# Retrieve and merge graphs from pre-req. jobs.
self.graph = g + fetch_job(job_id, redis_conn).result + fetch_job(job_turtle, redis_conn).result + fetch_job(job_ectyper_datastruct_vf, redis_conn).result
self.graph = g + fetch_job(job_id, redis_conn).result + fetch_job(job_turtle, redis_conn).result + fetch_job(job_ectyper_datastruct_vf, redis_conn).result

def sequences(self, genome_uri):
"""Retrieve sequences for object alleles
Expand Down Expand Up @@ -197,6 +201,54 @@ def fasta(self, genome_uri):

return fasta_string

@prefix
def _subtype_query(self):
"""
Queries for a specific URI of given type

Returns:
dictionary

"""
query = '''
SELECT ?subtype
WHERE {{
?region a faldo:Region ; :hasPart ?subtype .
?subtype a :VirulenceFactor .
}}
'''

return query

def _find_object(self, uri):
"""
Returns true if URI is already in database

Args:
uri(str): URI with prefix defined in config.py
rdftype(str): the URI linked by a rdf:type relationship to URI

"""

query = self._subtype_query()

query_result = self.graph.query(query)

l = [tup[0].toPython() for tup in query_result]
full_uri = str(gu(uri))

return full_uri in l

def validate(self, subtype):
"""Checks that the MakerSequence.graph has all the alleles required
for phylotyper analysis. Returns False if not (& Phylotyper should
not be run).
"""
# Check for existance of schema Marker components
for l in LOCI[subtype]:
if not self._find_object(l):
return False
return True

if __name__=='__main__':
import argparse
Expand Down
Loading