diff --git a/.travis.yml b/.travis.yml index 97b3156..a952cb9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: julia julia: - - 0.5 - 0.6 - nightly notifications: diff --git a/REQUIRE b/REQUIRE index 94237c0..137767a 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1 +1 @@ -julia 0.5 +julia 0.6 diff --git a/src/ontology.jl b/src/ontology.jl index 84b7d42..00c8f6c 100644 --- a/src/ontology.jl +++ b/src/ontology.jl @@ -1,5 +1,7 @@ - -immutable Ontology +""" +The collection of all ontology terms and their relations. +""" +struct Ontology header::Dict{String, Vector{String}} prefix::String terms::Dict{TermId, Term} @@ -18,7 +20,7 @@ function gettermbyname(ontology::Ontology, name) for term in allterms(ontology) (lowercase(term.name) == lname) && return term end - error("Term not found: $name") + throw(KeyError(name)) end gettermid(ontology::Ontology, id::Integer) = @sprintf("%s:%07d", ontology.prefix, id) @@ -39,8 +41,7 @@ Base.length(ontology::Ontology) = length(ontology.terms) parents(ontology::Ontology, term::Term, rel::Symbol = :is_a) = ontology[relationship(term, rel)] children(ontology::Ontology, term::Term, rel::Symbol = :is_a) = ontology[rev_relationship(term, rel)] -# FIXME use const when 0.5 compatibility is dropped -typealias VecOrTuple{T} Union{Vector{T}, Tuple{Vararg{T}}} +const VecOrTuple{T} = Union{Vector{T}, Tuple{Vararg{T}}} # return the set of all nodes of the ontology DAG that could be visited from `term` # node when traveling along `rels` edges using `rev` direction diff --git a/src/parser.jl b/src/parser.jl index e797f63..eb0875b 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1,9 +1,24 @@ # The OBO Flat File parser - -immutable Stanza +""" +Represents one entry in the OBO file, e.g. +``` +[Term] +id: GO:0000002 +namespace: biological_process +def: BBB +name: two +``` +is stored as `Stanza` with `Typ` = "Term", `id` = "GO:0000002" and +`tagvalues = Dict("id" => "GO:0000002", "namespace" => ["biological_process"], "def" => ["BBB"], "name" => "two")`. +""" +struct Stanza Typ::String # Official ones are: "Term", "Typedef" and "Instance" id::String - tagvalues::Dict{String, Vector{String}} + tagvalues::TagDict +end + +struct OBOParseException <: Exception + msg::String end function find_first_nonescaped(s, ch) @@ -34,7 +49,7 @@ function parseOBO(stream::IO) while nextstanza != "" prevstanza = nextstanza vals, nextstanza = parsetagvalues(stream) - haskey(vals, id_tag) || error("Stanza is missing ID tag") + haskey(vals, id_tag) || throw(OBOParseException("Stanza is missing ID tag")) id = vals[id_tag][1] push!(stanzas, Stanza(prevstanza, id, vals)) end @@ -47,8 +62,9 @@ parseOBO(filepath::AbstractString) = open(parseOBO, filepath, "r") const r_stanza = r"^\[(.*)\]$" +# returns tagvalues of the current Stanza and the type of the next one function parsetagvalues(s) - vals = Dict{String, Vector{String}}() + vals = TagDict() for line in eachline(s) line = strip(removecomments(line)) @@ -58,7 +74,7 @@ function parsetagvalues(s) isempty(line) && continue tag, value, ok = tagvalue(line) - ok || error("cannot find a tag (position: $(position(s))), empty: $(isempty(line)), line: `$(line)`") + ok || throw(OBOParseException("cannot find a tag (position: $(position(s))), empty: $(isempty(line)), line: `$(line)`")) push!(get!(()->Vector{String}(), vals, tag), value) end @@ -87,7 +103,7 @@ end function getuniqueval(st::Stanza, tagname, def::String="") if haskey(st.tagvalues, tagname) arr = st.tagvalues[tagname] - (length(arr) > 1) && error("Expect unique tag named $tagname") + (length(arr) > 1) && throw(OBOParseException("Expect unique tag named $tagname")) return arr[1] else return def @@ -102,14 +118,25 @@ function getterms(arr::Vector{Stanza}) term_obsolete = getuniqueval(st, "is_obsolete") == "true" term_name = getuniqueval(st, "name") - term_def = getuniqueval(st, "def") + term_def_and_refs = getuniqueval(st, "def") + term_def_matches = match(r"^\"([^\"]+)\"(?:\s\[(.+)\])?$", term_def_and_refs) + if term_def_matches !== nothing + term_def = term_def_matches[1] + term_refs = RefDict(begin + Pair(split(ref, r"(? 0 - error("Obsolete term $term contains is_a relationship") + if isobsolete(term) && length(relationship(term, :is_a)) > 0 + throw(OBOParseException("Obsolete term $term contains is_a relationship")) end append!(term.synonyms, get(st.tagvalues, "synonym", String[])) diff --git a/src/term.jl b/src/term.jl index 5983034..6d478a1 100644 --- a/src/term.jl +++ b/src/term.jl @@ -1,26 +1,46 @@ const TermId = String - -immutable Term +const TagDict = Dict{String, Vector{String}} +const RefDict = Dict{String, String} +const RelDict = Dict{Symbol, Set{TermId}} + +""" +Ontology term. + +The `Term` object is a node in the direct acyclic ontology graph. +Its outgoing and incoming edges represent the relations with the other nodes and +could be retrieved by +```julia +relationship(term, sym) +``` +and +```julia +rev_relationship(term, sym) +``` +respectively, where `sym` is the relationship annotation (e.g. `:part_of`, `:is_a`, `:regulates`). +""" +struct Term id::TermId name::String obsolete::Bool namespace::String def::String + refs::RefDict synonyms::Vector{String} - tagvalues::Dict{String, Vector{String}} + tagvalues::TagDict - relationships::Dict{Symbol, Set{TermId}} - rev_relationships::Dict{Symbol, Set{TermId}} # reverse relationships + relationships::RelDict + rev_relationships::RelDict # reverse relationships Term(id::AbstractString, name::AbstractString="", obsolete::Bool=false, - namespace::AbstractString="", def::AbstractString="") = - new(id, name, obsolete, namespace, def, String[], - Dict{String, Vector{String}}(), - Dict{Symbol, Set{TermId}}(), Dict{Symbol, Set{TermId}}()) + namespace::AbstractString="", def::AbstractString="", + refs::RefDict=RefDict()) = + new(id, name, obsolete, namespace, def, refs, String[], + TagDict(), RelDict(), RelDict()) Term(term::Term, name::AbstractString=term.name, obsolete::Bool=term.obsolete, - namespace::AbstractString=term.namespace, def::AbstractString=term.def) = - new(term.id, name, obsolete, namespace, def, term.synonyms, + namespace::AbstractString=term.namespace, def::AbstractString=term.def, + refs::RefDict=term.refs) = + new(term.id, name, obsolete, namespace, def, refs, term.synonyms, term.tagvalues, term.relationships, term.rev_relationships) end diff --git a/src/typedef.jl b/src/typedef.jl index bdedaae..9e6a139 100644 --- a/src/typedef.jl +++ b/src/typedef.jl @@ -1,11 +1,12 @@ -immutable Typedef +# FIXME add description +""" +""" +struct Typedef id::String name::String namespace::String xref::String end -import Base: isequal, == - -isequal(td1::Typedef, td2::Typedef) = td1.id == td2.id -==(td1::Typedef, td2::Typedef) = isequal(td1, td2) +Base.isequal(td1::Typedef, td2::Typedef) = td1.id == td2.id +Base.:(==)(td1::Typedef, td2::Typedef) = isequal(td1, td2) diff --git a/test/test_graph.jl b/test/test_graph.jl index 4e5af1b..d8dafdd 100644 --- a/test/test_graph.jl +++ b/test/test_graph.jl @@ -6,12 +6,22 @@ end @testset "relationship tests" begin GO = OBOParse.load("$testdir/data/go_mini.obo", "GO") + @test_throws KeyError gettermbyid(GO, 0) term1 = gettermbyid(GO, 1) term2 = gettermbyid(GO, 2) term4 = gettermbyid(GO, 4) term5 = gettermbyid(GO, 5) term6 = gettermbyid(GO, 6) + @test_throws KeyError gettermbyid(GO, OBOParse.gettermid(GO, 0)) + @test gettermbyid(GO, OBOParse.gettermid(GO, 1)) == term1 + @test gettermbyid(GO, "GO:0000001") == term1 + @test gettermbyid(GO, OBOParse.gettermid(GO, 2)) == term2 + + @test_throws KeyError gettermbyname(GO, "zero") + @test gettermbyname(GO, "one") == term1 + @test gettermbyname(GO, "two") == term2 + test_isa(GO, term1, term2) test_isa(GO, term4, term2) test_isa(GO, term5, term4) diff --git a/test/test_parser.jl b/test/test_parser.jl index db84021..2b93b19 100644 --- a/test/test_parser.jl +++ b/test/test_parser.jl @@ -22,7 +22,11 @@ end @test length(GO) == 5 @test GO["GO:0000002"].name == "two" + @test GO["GO:0000002"].def == "BBB" + @test GO["GO:0000002"].refs == OBOParse.RefDict() @test GO["GO:0000001"].name == "one" + @test GO["GO:0000001"].def == "AAA" + @test GO["GO:0000001"].refs == OBOParse.RefDict() @test GO["GO:0000004"].name == "four" @test GO["GO:0000005"].name == "five" @test GO["GO:0000006"].name == "six" @@ -32,9 +36,12 @@ end GO = OBOParse.load("$testdir/data/go.obo", "GO") @test length(GO) > 71 + @test GO["GO:0000009"].name == "alpha-1,6-mannosyltransferase activity" + @test GO["GO:0000009"].def == "Catalysis of the transfer of a mannose residue to an oligosaccharide, forming an alpha-(1->6) linkage." + @test GO["GO:0000009"].refs == OBOParse.RefDict("GOC"=>"mcc", "PMID"=>"2644248") + term1 = gettermbyid(GO, 18) term2 = gettermbyid(GO, 6310) - @test relationship(term1, :regulates) == Set{OBOParse.TermId}((term2.id,)) @test relationship(term2, :regulates) == Set{OBOParse.TermId}() end