A Go package for deriving Universal Dependencies from Dutch sentences parsed with Alpino.
Example input:
<?xml version="1.0" encoding="UTF-8"?>
<alpino_ds version="1.6">
<node begin="0" cat="top" end="8" id="0" rel="top">
<node begin="0" cat="smain" end="7" id="1" rel="--">
<node begin="0" cat="pp" end="3" id="2" rel="mod">
<node begin="0" end="1" frame="preposition(in,[])"
his="normal" his_1="decap" his_1_1="normal" id="3"
lcat="pp" lemma="in" pos="prep" postag="VZ(init)"
pt="vz" rel="hd" root="in" sense="in" vztype="init"
word="In"/>
<node begin="1" cat="np" end="3" id="4" rel="obj1">
<node begin="1" end="2"
frame="determiner(het,nwh,nmod,pro,nparg,wkpro)"
his="normal" his_1="normal" id="5" infl="het"
lcat="detp" lemma="het" lwtype="bep" naamval="stan"
npagr="evon" pos="det" postag="LID(bep,stan,evon)"
pt="lid" rel="det" root="het" sense="het" wh="nwh"
word="het"/>
<node begin="2" end="3" frame="proper_name(sg,'LOC')"
genus="onz" getal="ev" graad="basis" his="normal"
his_1="names_dictionary" id="6" lcat="np"
lemma="Waddengebied" naamval="stan" neclass="LOC"
ntype="eigen" num="sg" pos="name"
postag="N(eigen,ev,basis,onz,stan)" pt="n" rel="hd"
rnum="sg" root="Waddengebied" sense="Waddengebied"
word="Waddengebied"/>
</node>
</node>
<node begin="3" end="4" frame="verb(unacc,sg_heeft,copula)"
his="normal" his_1="normal" id="7" infl="sg_heeft"
lcat="smain" lemma="zijn" pos="verb"
postag="WW(pv,tgw,ev)" pt="ww" pvagr="ev" pvtijd="tgw"
rel="hd" root="ben" sc="copula" sense="ben"
stype="declarative" tense="present" word="is" wvorm="pv"/>
<node begin="4" cat="np" end="6" id="8" rel="su">
<node begin="4" end="5" frame="determiner(de)" his="normal"
his_1="normal" id="9" infl="de" lcat="detp" lemma="de"
lwtype="bep" naamval="stan" npagr="rest" pos="det"
postag="LID(bep,stan,rest)" pt="lid" rel="det" root="de"
sense="de" word="de"/>
<node begin="5" end="6" frame="noun(de,count,sg)" gen="de"
genus="zijd" getal="ev" graad="basis" his="normal"
his_1="normal" id="10" lcat="np" lemma="wind"
naamval="stan" ntype="soort" num="sg" pos="noun"
postag="N(soort,ev,basis,zijd,stan)" pt="n" rel="hd"
rnum="sg" root="wind" sense="wind" word="wind"/>
</node>
<node aform="base" begin="6" buiging="zonder" end="7"
frame="adjective(no_e(nonadv))" graad="basis" his="normal"
his_1="normal" id="11" infl="no_e" lcat="ap"
lemma="veranderlijk" pos="adj" positie="vrij"
postag="ADJ(vrij,basis,zonder)" pt="adj" rel="predc"
root="veranderlijk" sense="veranderlijk" vform="adj"
word="veranderlijk"/>
</node>
<node begin="7" end="8" frame="punct(punt)" his="normal"
his_1="normal" id="12" lcat="punct" lemma="." pos="punct"
postag="LET()" pt="let" rel="--" root="." sense="."
special="punt" word="."/>
</node>
<sentence sentid="knmi.1">In het Waddengebied is de wind veranderlijk .</sentence>
</alpino_ds>
Output (reformatted):
# source = knmi.1.xml
# sent_id = knmi.1
# text = In het Waddengebied is de wind veranderlijk.
# auto = ALUD 1
1 In in ADP VZ|init _ 3 case 3:case _
2 het het DET LID|bep|stan|evon Definite=Def 3 det 3:det _
3 Waddengebied Waddengebied PROPN N|eigen|ev|basis|onz|stan Gender=Neut|Number=Sing 7 obl 7:obl:in _
4 is zijn AUX WW|pv|tgw|ev Number=Sing|Tense=Pres|VerbForm=Fin 7 cop 7:cop _
5 de de DET LID|bep|stan|rest Definite=Def 6 det 6:det _
6 wind wind NOUN N|soort|ev|basis|zijd|stan Gender=Com|Number=Sing 7 nsubj 7:nsubj _
7 veranderlijk veranderlijk ADJ ADJ|vrij|basis|zonder Degree=Pos 0 root 0:root SpaceAfter=No
8 . . PUNCT LET _ 7 punct 7:punct _
For visualisation of the result, see http://www.let.rug.nl/kleiweg/conllu/
- Derive Universal Dependencies and insert into the alpino_ds format
- Insert given Universal Dependencies into the alpino_ds format
The package comes with two example programs, alud
and alud-dact
.
To install the demo program alud-dact
, you need to have
Oracle Berkeley DB XML
installed. With alud-dact
you can process dact files.
Install both:
make bin
... or install alud
only:
make alud
... or install alud-dact
only:
make alud-dact
You will find the programs in one of these directories:
- in
$GOBIN
if variable is set - else in
$GOPATH/bin
if variable is set - else in
~/go/bin