From b710e667c2c82c6d885bdd02eb7ec10d40ceef27 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 16 May 2018 16:59:20 +0000 Subject: [PATCH 1/9] require julia v0.6 - `immutable` -> `struct` - `typealias A B` -> `A = B` - drop julia v0.5 from TravisCI --- .travis.yml | 1 - REQUIRE | 2 +- src/ontology.jl | 5 ++--- src/parser.jl | 2 +- src/term.jl | 2 +- src/typedef.jl | 2 +- 6 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 97b3156..a952cb9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: julia julia: - - 0.5 - 0.6 - nightly notifications: diff --git a/REQUIRE b/REQUIRE index 94237c0..137767a 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1 +1 @@ -julia 0.5 +julia 0.6 diff --git a/src/ontology.jl b/src/ontology.jl index 84b7d42..f103127 100644 --- a/src/ontology.jl +++ b/src/ontology.jl @@ -1,5 +1,5 @@ -immutable Ontology +struct Ontology header::Dict{String, Vector{String}} prefix::String terms::Dict{TermId, Term} @@ -39,8 +39,7 @@ Base.length(ontology::Ontology) = length(ontology.terms) parents(ontology::Ontology, term::Term, rel::Symbol = :is_a) = ontology[relationship(term, rel)] children(ontology::Ontology, term::Term, rel::Symbol = :is_a) = ontology[rev_relationship(term, rel)] -# FIXME use const when 0.5 compatibility is dropped -typealias VecOrTuple{T} Union{Vector{T}, Tuple{Vararg{T}}} +const VecOrTuple{T} = Union{Vector{T}, Tuple{Vararg{T}}} # return the set of all nodes of the ontology DAG that could be visited from `term` # node when traveling along `rels` edges using `rev` direction diff --git a/src/parser.jl b/src/parser.jl index e797f63..678c2fa 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1,6 +1,6 @@ # The OBO Flat File parser -immutable Stanza +struct Stanza Typ::String # Official ones are: "Term", "Typedef" and "Instance" id::String tagvalues::Dict{String, Vector{String}} diff --git a/src/term.jl b/src/term.jl index 5983034..facf514 100644 --- a/src/term.jl +++ b/src/term.jl @@ -1,6 +1,6 @@ const TermId = String -immutable Term +struct Term id::TermId name::String diff --git a/src/typedef.jl b/src/typedef.jl index bdedaae..1e2ef6b 100644 --- a/src/typedef.jl +++ b/src/typedef.jl @@ -1,4 +1,4 @@ -immutable Typedef +struct Typedef id::String name::String namespace::String From 30d385055c9898105830a98157ef5e6f6b052a13 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Thu, 13 Jul 2017 21:12:36 +0000 Subject: [PATCH 2/9] gettermbyname(): throw KeyError + tests --- src/ontology.jl | 2 +- test/test_graph.jl | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/ontology.jl b/src/ontology.jl index f103127..c019ee1 100644 --- a/src/ontology.jl +++ b/src/ontology.jl @@ -18,7 +18,7 @@ function gettermbyname(ontology::Ontology, name) for term in allterms(ontology) (lowercase(term.name) == lname) && return term end - error("Term not found: $name") + throw(KeyError(name)) end gettermid(ontology::Ontology, id::Integer) = @sprintf("%s:%07d", ontology.prefix, id) diff --git a/test/test_graph.jl b/test/test_graph.jl index 4e5af1b..d8dafdd 100644 --- a/test/test_graph.jl +++ b/test/test_graph.jl @@ -6,12 +6,22 @@ end @testset "relationship tests" begin GO = OBOParse.load("$testdir/data/go_mini.obo", "GO") + @test_throws KeyError gettermbyid(GO, 0) term1 = gettermbyid(GO, 1) term2 = gettermbyid(GO, 2) term4 = gettermbyid(GO, 4) term5 = gettermbyid(GO, 5) term6 = gettermbyid(GO, 6) + @test_throws KeyError gettermbyid(GO, OBOParse.gettermid(GO, 0)) + @test gettermbyid(GO, OBOParse.gettermid(GO, 1)) == term1 + @test gettermbyid(GO, "GO:0000001") == term1 + @test gettermbyid(GO, OBOParse.gettermid(GO, 2)) == term2 + + @test_throws KeyError gettermbyname(GO, "zero") + @test gettermbyname(GO, "one") == term1 + @test gettermbyname(GO, "two") == term2 + test_isa(GO, term1, term2) test_isa(GO, term4, term2) test_isa(GO, term5, term4) From 96c407a523cacf982e0189839c892570cacb8162 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 25 Sep 2017 13:17:11 +0000 Subject: [PATCH 3/9] isequal(Typedef,Typedef): import Base not required --- src/typedef.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/typedef.jl b/src/typedef.jl index 1e2ef6b..4d8561c 100644 --- a/src/typedef.jl +++ b/src/typedef.jl @@ -5,7 +5,5 @@ struct Typedef xref::String end -import Base: isequal, == - -isequal(td1::Typedef, td2::Typedef) = td1.id == td2.id -==(td1::Typedef, td2::Typedef) = isequal(td1, td2) +Base.isequal(td1::Typedef, td2::Typedef) = td1.id == td2.id +Base.:(==)(td1::Typedef, td2::Typedef) = isequal(td1, td2) From aa3703767b785ba17b243dff86b3755fcfdc5728 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 25 Sep 2017 13:19:22 +0000 Subject: [PATCH 4/9] TagDict, RelDict typealiases --- src/parser.jl | 4 ++-- src/term.jl | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index 678c2fa..a1a5c8d 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -3,7 +3,7 @@ struct Stanza Typ::String # Official ones are: "Term", "Typedef" and "Instance" id::String - tagvalues::Dict{String, Vector{String}} + tagvalues::TagDict end function find_first_nonescaped(s, ch) @@ -48,7 +48,7 @@ parseOBO(filepath::AbstractString) = open(parseOBO, filepath, "r") const r_stanza = r"^\[(.*)\]$" function parsetagvalues(s) - vals = Dict{String, Vector{String}}() + vals = TagDict() for line in eachline(s) line = strip(removecomments(line)) diff --git a/src/term.jl b/src/term.jl index facf514..d56b7ac 100644 --- a/src/term.jl +++ b/src/term.jl @@ -1,4 +1,6 @@ const TermId = String +const TagDict = Dict{String, Vector{String}} +const RelDict = Dict{Symbol, Set{TermId}} struct Term id::TermId @@ -8,16 +10,15 @@ struct Term namespace::String def::String synonyms::Vector{String} - tagvalues::Dict{String, Vector{String}} + tagvalues::TagDict - relationships::Dict{Symbol, Set{TermId}} - rev_relationships::Dict{Symbol, Set{TermId}} # reverse relationships + relationships::RelDict + rev_relationships::RelDict # reverse relationships Term(id::AbstractString, name::AbstractString="", obsolete::Bool=false, namespace::AbstractString="", def::AbstractString="") = new(id, name, obsolete, namespace, def, String[], - Dict{String, Vector{String}}(), - Dict{Symbol, Set{TermId}}(), Dict{Symbol, Set{TermId}}()) + TagDict(), RelDict(), RelDict()) Term(term::Term, name::AbstractString=term.name, obsolete::Bool=term.obsolete, namespace::AbstractString=term.namespace, def::AbstractString=term.def) = new(term.id, name, obsolete, namespace, def, term.synonyms, From 542dbad6b324fa7b0585596e4e12a7a7d614453d Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 25 Sep 2017 13:21:44 +0000 Subject: [PATCH 5/9] throw OBOParseException instead of error --- src/parser.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index a1a5c8d..6f7eeb6 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -6,6 +6,10 @@ struct Stanza tagvalues::TagDict end +struct OBOParseException <: Exception + msg::String +end + function find_first_nonescaped(s, ch) i = searchindex(s, ch) while i > 0 @@ -34,7 +38,7 @@ function parseOBO(stream::IO) while nextstanza != "" prevstanza = nextstanza vals, nextstanza = parsetagvalues(stream) - haskey(vals, id_tag) || error("Stanza is missing ID tag") + haskey(vals, id_tag) || throw(OBOParseException("Stanza is missing ID tag")) id = vals[id_tag][1] push!(stanzas, Stanza(prevstanza, id, vals)) end @@ -58,7 +62,7 @@ function parsetagvalues(s) isempty(line) && continue tag, value, ok = tagvalue(line) - ok || error("cannot find a tag (position: $(position(s))), empty: $(isempty(line)), line: `$(line)`") + ok || throw(OBOParseException("cannot find a tag (position: $(position(s))), empty: $(isempty(line)), line: `$(line)`")) push!(get!(()->Vector{String}(), vals, tag), value) end @@ -87,7 +91,7 @@ end function getuniqueval(st::Stanza, tagname, def::String="") if haskey(st.tagvalues, tagname) arr = st.tagvalues[tagname] - (length(arr) > 1) && error("Expect unique tag named $tagname") + (length(arr) > 1) && throw(OBOParseException("Expect unique tag named $tagname")) return arr[1] else return def @@ -121,7 +125,7 @@ function getterms(arr::Vector{Stanza}) for rel in get(st.tagvalues, "relationship", String[]) rel = strip(rel) tmp = split(rel) - length(tmp) == 2 || error("Failed to parse relationship field: $rel") + length(tmp) == 2 || throw(OBOParseException("Failed to parse relationship field: $rel")) rel_type = Symbol(tmp[1]) rel_id = tmp[2] @@ -132,7 +136,7 @@ function getterms(arr::Vector{Stanza}) end if isobsolete(term) && length(relationship(term ,:is_a)) > 0 - error("Obsolete term $term contains is_a relationship") + throw(OBOParseException("Obsolete term $term contains is_a relationship")) end append!(term.synonyms, get(st.tagvalues, "synonym", String[])) From aeac5f88d0b277389314ced11b655b32dac0a8bb Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 25 Sep 2017 13:27:15 +0000 Subject: [PATCH 6/9] test name and def parsing --- test/test_parser.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_parser.jl b/test/test_parser.jl index db84021..f00aa57 100644 --- a/test/test_parser.jl +++ b/test/test_parser.jl @@ -22,7 +22,9 @@ end @test length(GO) == 5 @test GO["GO:0000002"].name == "two" + @test GO["GO:0000002"].def == "BBB" @test GO["GO:0000001"].name == "one" + @test GO["GO:0000001"].def == "AAA" @test GO["GO:0000004"].name == "four" @test GO["GO:0000005"].name == "five" @test GO["GO:0000006"].name == "six" @@ -32,9 +34,11 @@ end GO = OBOParse.load("$testdir/data/go.obo", "GO") @test length(GO) > 71 + @test GO["GO:0000009"].name == "alpha-1,6-mannosyltransferase activity" + @test GO["GO:0000009"].def == "\"Catalysis of the transfer of a mannose residue to an oligosaccharide, forming an alpha-(1->6) linkage.\" [GOC:mcc, PMID:2644248]" + term1 = gettermbyid(GO, 18) term2 = gettermbyid(GO, 6310) - @test relationship(term1, :regulates) == Set{OBOParse.TermId}((term2.id,)) @test relationship(term2, :regulates) == Set{OBOParse.TermId}() end From 8a1c2ec495e930c72fbe5693eec9f968bb99d95e Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Mon, 25 Sep 2017 13:25:43 +0000 Subject: [PATCH 7/9] parse refs in Term def field helps to better keep track of the term references and makes term definition text more human-readable --- src/parser.jl | 17 ++++++++++++++--- src/term.jl | 12 ++++++++---- test/test_parser.jl | 5 ++++- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index 6f7eeb6..ba1e1d6 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -106,14 +106,25 @@ function getterms(arr::Vector{Stanza}) term_obsolete = getuniqueval(st, "is_obsolete") == "true" term_name = getuniqueval(st, "name") - term_def = getuniqueval(st, "def") + term_def_and_refs = getuniqueval(st, "def") + term_def_matches = match(r"^\"([^\"]+)\"(?:\s\[(.+)\])?$", term_def_and_refs) + if term_def_matches !== nothing + term_def = term_def_matches[1] + term_refs = RefDict(begin + Pair(split(ref, r"(? 71 @test GO["GO:0000009"].name == "alpha-1,6-mannosyltransferase activity" - @test GO["GO:0000009"].def == "\"Catalysis of the transfer of a mannose residue to an oligosaccharide, forming an alpha-(1->6) linkage.\" [GOC:mcc, PMID:2644248]" + @test GO["GO:0000009"].def == "Catalysis of the transfer of a mannose residue to an oligosaccharide, forming an alpha-(1->6) linkage." + @test GO["GO:0000009"].refs == OBOParse.RefDict("GOC"=>"mcc", "PMID"=>"2644248") term1 = gettermbyid(GO, 18) term2 = gettermbyid(GO, 6310) From a778a00e8f684e1c329ab145cbca4ea57ff5cdbd Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Tue, 28 Nov 2017 15:05:17 +0000 Subject: [PATCH 8/9] fix whitespace --- src/parser.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.jl b/src/parser.jl index ba1e1d6..46dbdd5 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -146,7 +146,7 @@ function getterms(arr::Vector{Stanza}) push!(rev_relationship(otherterm, rel_type), st.id) end - if isobsolete(term) && length(relationship(term ,:is_a)) > 0 + if isobsolete(term) && length(relationship(term, :is_a)) > 0 throw(OBOParseException("Obsolete term $term contains is_a relationship")) end From 6f2a5f3af5823efc40bbe8829aee4a725332f7ae Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Wed, 16 May 2018 16:58:49 +0000 Subject: [PATCH 9/9] add docstrings --- src/ontology.jl | 4 +++- src/parser.jl | 14 +++++++++++++- src/term.jl | 15 +++++++++++++++ src/typedef.jl | 3 +++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/ontology.jl b/src/ontology.jl index c019ee1..00c8f6c 100644 --- a/src/ontology.jl +++ b/src/ontology.jl @@ -1,4 +1,6 @@ - +""" +The collection of all ontology terms and their relations. +""" struct Ontology header::Dict{String, Vector{String}} prefix::String diff --git a/src/parser.jl b/src/parser.jl index 46dbdd5..eb0875b 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1,5 +1,16 @@ # The OBO Flat File parser - +""" +Represents one entry in the OBO file, e.g. +``` +[Term] +id: GO:0000002 +namespace: biological_process +def: BBB +name: two +``` +is stored as `Stanza` with `Typ` = "Term", `id` = "GO:0000002" and +`tagvalues = Dict("id" => "GO:0000002", "namespace" => ["biological_process"], "def" => ["BBB"], "name" => "two")`. +""" struct Stanza Typ::String # Official ones are: "Term", "Typedef" and "Instance" id::String @@ -51,6 +62,7 @@ parseOBO(filepath::AbstractString) = open(parseOBO, filepath, "r") const r_stanza = r"^\[(.*)\]$" +# returns tagvalues of the current Stanza and the type of the next one function parsetagvalues(s) vals = TagDict() diff --git a/src/term.jl b/src/term.jl index 92947bc..6d478a1 100644 --- a/src/term.jl +++ b/src/term.jl @@ -3,6 +3,21 @@ const TagDict = Dict{String, Vector{String}} const RefDict = Dict{String, String} const RelDict = Dict{Symbol, Set{TermId}} +""" +Ontology term. + +The `Term` object is a node in the direct acyclic ontology graph. +Its outgoing and incoming edges represent the relations with the other nodes and +could be retrieved by +```julia +relationship(term, sym) +``` +and +```julia +rev_relationship(term, sym) +``` +respectively, where `sym` is the relationship annotation (e.g. `:part_of`, `:is_a`, `:regulates`). +""" struct Term id::TermId name::String diff --git a/src/typedef.jl b/src/typedef.jl index 4d8561c..9e6a139 100644 --- a/src/typedef.jl +++ b/src/typedef.jl @@ -1,3 +1,6 @@ +# FIXME add description +""" +""" struct Typedef id::String name::String