Skip to content

Commit

Permalink
Merge pull request #3 from alyst/alyst_fixes2
Browse files Browse the repository at this point in the history
Switch to Julia v0.6 + other improvements
  • Loading branch information
maximsch2 authored May 17, 2018
2 parents 3960f66 + 6f2a5f3 commit 877dc65
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 37 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: julia
julia:
- 0.5
- 0.6
- nightly
notifications:
Expand Down
2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1 +1 @@
julia 0.5
julia 0.6
11 changes: 6 additions & 5 deletions src/ontology.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@

immutable Ontology
"""
The collection of all ontology terms and their relations.
"""
struct Ontology
header::Dict{String, Vector{String}}
prefix::String
terms::Dict{TermId, Term}
Expand All @@ -18,7 +20,7 @@ function gettermbyname(ontology::Ontology, name)
for term in allterms(ontology)
(lowercase(term.name) == lname) && return term
end
error("Term not found: $name")
throw(KeyError(name))
end

gettermid(ontology::Ontology, id::Integer) = @sprintf("%s:%07d", ontology.prefix, id)
Expand All @@ -39,8 +41,7 @@ Base.length(ontology::Ontology) = length(ontology.terms)
parents(ontology::Ontology, term::Term, rel::Symbol = :is_a) = ontology[relationship(term, rel)]
children(ontology::Ontology, term::Term, rel::Symbol = :is_a) = ontology[rev_relationship(term, rel)]

# FIXME use const when 0.5 compatibility is dropped
typealias VecOrTuple{T} Union{Vector{T}, Tuple{Vararg{T}}}
const VecOrTuple{T} = Union{Vector{T}, Tuple{Vararg{T}}}

# return the set of all nodes of the ontology DAG that could be visited from `term`
# node when traveling along `rels` edges using `rev` direction
Expand Down
53 changes: 40 additions & 13 deletions src/parser.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
# The OBO Flat File parser

immutable Stanza
"""
Represents one entry in the OBO file, e.g.
```
[Term]
id: GO:0000002
namespace: biological_process
def: BBB
name: two
```
is stored as `Stanza` with `Typ` = "Term", `id` = "GO:0000002" and
`tagvalues = Dict("id" => "GO:0000002", "namespace" => ["biological_process"], "def" => ["BBB"], "name" => "two")`.
"""
struct Stanza
Typ::String # Official ones are: "Term", "Typedef" and "Instance"
id::String
tagvalues::Dict{String, Vector{String}}
tagvalues::TagDict
end

struct OBOParseException <: Exception
msg::String
end

function find_first_nonescaped(s, ch)
Expand Down Expand Up @@ -34,7 +49,7 @@ function parseOBO(stream::IO)
while nextstanza != ""
prevstanza = nextstanza
vals, nextstanza = parsetagvalues(stream)
haskey(vals, id_tag) || error("Stanza is missing ID tag")
haskey(vals, id_tag) || throw(OBOParseException("Stanza is missing ID tag"))
id = vals[id_tag][1]
push!(stanzas, Stanza(prevstanza, id, vals))
end
Expand All @@ -47,8 +62,9 @@ parseOBO(filepath::AbstractString) = open(parseOBO, filepath, "r")

const r_stanza = r"^\[(.*)\]$"

# returns tagvalues of the current Stanza and the type of the next one
function parsetagvalues(s)
vals = Dict{String, Vector{String}}()
vals = TagDict()

for line in eachline(s)
line = strip(removecomments(line))
Expand All @@ -58,7 +74,7 @@ function parsetagvalues(s)
isempty(line) && continue

tag, value, ok = tagvalue(line)
ok || error("cannot find a tag (position: $(position(s))), empty: $(isempty(line)), line: `$(line)`")
ok || throw(OBOParseException("cannot find a tag (position: $(position(s))), empty: $(isempty(line)), line: `$(line)`"))
push!(get!(()->Vector{String}(), vals, tag), value)
end

Expand Down Expand Up @@ -87,7 +103,7 @@ end
function getuniqueval(st::Stanza, tagname, def::String="")
if haskey(st.tagvalues, tagname)
arr = st.tagvalues[tagname]
(length(arr) > 1) && error("Expect unique tag named $tagname")
(length(arr) > 1) && throw(OBOParseException("Expect unique tag named $tagname"))
return arr[1]
else
return def
Expand All @@ -102,14 +118,25 @@ function getterms(arr::Vector{Stanza})

term_obsolete = getuniqueval(st, "is_obsolete") == "true"
term_name = getuniqueval(st, "name")
term_def = getuniqueval(st, "def")
term_def_and_refs = getuniqueval(st, "def")
term_def_matches = match(r"^\"([^\"]+)\"(?:\s\[(.+)\])?$", term_def_and_refs)
if term_def_matches !== nothing
term_def = term_def_matches[1]
term_refs = RefDict(begin
Pair(split(ref, r"(?<!\\):")...)
end for ref in split(term_def_matches[2], ", "))
else # plain format
term_def = term_def_and_refs
term_refs = RefDict()
end

term_namespace = getuniqueval(st, "namespace")
if haskey(result, st.id)
# term was automatically created, re-create it with the correct properties,
# but preserve the existing relationships
term = result[st.id] = Term(result[st.id], term_name, term_obsolete, term_namespace, term_def)
term = result[st.id] = Term(result[st.id], term_name, term_obsolete, term_namespace, term_def, term_refs)
else # brand new term
term = result[st.id] = Term(st.id, term_name, term_obsolete, term_namespace, term_def)
term = result[st.id] = Term(st.id, term_name, term_obsolete, term_namespace, term_def, term_refs)
end

for otherid in get(st.tagvalues, "is_a", String[])
Expand All @@ -121,7 +148,7 @@ function getterms(arr::Vector{Stanza})
for rel in get(st.tagvalues, "relationship", String[])
rel = strip(rel)
tmp = split(rel)
length(tmp) == 2 || error("Failed to parse relationship field: $rel")
length(tmp) == 2 || throw(OBOParseException("Failed to parse relationship field: $rel"))

rel_type = Symbol(tmp[1])
rel_id = tmp[2]
Expand All @@ -131,8 +158,8 @@ function getterms(arr::Vector{Stanza})
push!(rev_relationship(otherterm, rel_type), st.id)
end

if isobsolete(term) && length(relationship(term ,:is_a)) > 0
error("Obsolete term $term contains is_a relationship")
if isobsolete(term) && length(relationship(term, :is_a)) > 0
throw(OBOParseException("Obsolete term $term contains is_a relationship"))
end

append!(term.synonyms, get(st.tagvalues, "synonym", String[]))
Expand Down
42 changes: 31 additions & 11 deletions src/term.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,46 @@
const TermId = String

immutable Term
const TagDict = Dict{String, Vector{String}}
const RefDict = Dict{String, String}
const RelDict = Dict{Symbol, Set{TermId}}

"""
Ontology term.
The `Term` object is a node in the direct acyclic ontology graph.
Its outgoing and incoming edges represent the relations with the other nodes and
could be retrieved by
```julia
relationship(term, sym)
```
and
```julia
rev_relationship(term, sym)
```
respectively, where `sym` is the relationship annotation (e.g. `:part_of`, `:is_a`, `:regulates`).
"""
struct Term
id::TermId
name::String

obsolete::Bool
namespace::String
def::String
refs::RefDict
synonyms::Vector{String}
tagvalues::Dict{String, Vector{String}}
tagvalues::TagDict

relationships::Dict{Symbol, Set{TermId}}
rev_relationships::Dict{Symbol, Set{TermId}} # reverse relationships
relationships::RelDict
rev_relationships::RelDict # reverse relationships

Term(id::AbstractString, name::AbstractString="", obsolete::Bool=false,
namespace::AbstractString="", def::AbstractString="") =
new(id, name, obsolete, namespace, def, String[],
Dict{String, Vector{String}}(),
Dict{Symbol, Set{TermId}}(), Dict{Symbol, Set{TermId}}())
namespace::AbstractString="", def::AbstractString="",
refs::RefDict=RefDict()) =
new(id, name, obsolete, namespace, def, refs, String[],
TagDict(), RelDict(), RelDict())
Term(term::Term, name::AbstractString=term.name, obsolete::Bool=term.obsolete,
namespace::AbstractString=term.namespace, def::AbstractString=term.def) =
new(term.id, name, obsolete, namespace, def, term.synonyms,
namespace::AbstractString=term.namespace, def::AbstractString=term.def,
refs::RefDict=term.refs) =
new(term.id, name, obsolete, namespace, def, refs, term.synonyms,
term.tagvalues, term.relationships, term.rev_relationships)
end

Expand Down
11 changes: 6 additions & 5 deletions src/typedef.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
immutable Typedef
# FIXME add description
"""
"""
struct Typedef
id::String
name::String
namespace::String
xref::String
end

import Base: isequal, ==

isequal(td1::Typedef, td2::Typedef) = td1.id == td2.id
==(td1::Typedef, td2::Typedef) = isequal(td1, td2)
Base.isequal(td1::Typedef, td2::Typedef) = td1.id == td2.id
Base.:(==)(td1::Typedef, td2::Typedef) = isequal(td1, td2)
10 changes: 10 additions & 0 deletions test/test_graph.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,22 @@ end
@testset "relationship tests" begin
GO = OBOParse.load("$testdir/data/go_mini.obo", "GO")

@test_throws KeyError gettermbyid(GO, 0)
term1 = gettermbyid(GO, 1)
term2 = gettermbyid(GO, 2)
term4 = gettermbyid(GO, 4)
term5 = gettermbyid(GO, 5)
term6 = gettermbyid(GO, 6)

@test_throws KeyError gettermbyid(GO, OBOParse.gettermid(GO, 0))
@test gettermbyid(GO, OBOParse.gettermid(GO, 1)) == term1
@test gettermbyid(GO, "GO:0000001") == term1
@test gettermbyid(GO, OBOParse.gettermid(GO, 2)) == term2

@test_throws KeyError gettermbyname(GO, "zero")
@test gettermbyname(GO, "one") == term1
@test gettermbyname(GO, "two") == term2

test_isa(GO, term1, term2)
test_isa(GO, term4, term2)
test_isa(GO, term5, term4)
Expand Down
9 changes: 8 additions & 1 deletion test/test_parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ end

@test length(GO) == 5
@test GO["GO:0000002"].name == "two"
@test GO["GO:0000002"].def == "BBB"
@test GO["GO:0000002"].refs == OBOParse.RefDict()
@test GO["GO:0000001"].name == "one"
@test GO["GO:0000001"].def == "AAA"
@test GO["GO:0000001"].refs == OBOParse.RefDict()
@test GO["GO:0000004"].name == "four"
@test GO["GO:0000005"].name == "five"
@test GO["GO:0000006"].name == "six"
Expand All @@ -32,9 +36,12 @@ end
GO = OBOParse.load("$testdir/data/go.obo", "GO")
@test length(GO) > 71

@test GO["GO:0000009"].name == "alpha-1,6-mannosyltransferase activity"
@test GO["GO:0000009"].def == "Catalysis of the transfer of a mannose residue to an oligosaccharide, forming an alpha-(1->6) linkage."
@test GO["GO:0000009"].refs == OBOParse.RefDict("GOC"=>"mcc", "PMID"=>"2644248")

term1 = gettermbyid(GO, 18)
term2 = gettermbyid(GO, 6310)

@test relationship(term1, :regulates) == Set{OBOParse.TermId}((term2.id,))
@test relationship(term2, :regulates) == Set{OBOParse.TermId}()
end
Expand Down

0 comments on commit 877dc65

Please sign in to comment.