Skip to content

Commit

Permalink
Merge branch 'pcedt2.0_coref'
Browse files Browse the repository at this point in the history
  • Loading branch information
michnov committed Mar 31, 2016
2 parents 528280d + 9a62eaf commit 9757501
Show file tree
Hide file tree
Showing 12 changed files with 175 additions and 41 deletions.
21 changes: 13 additions & 8 deletions lib/Treex/Block/Align/T/Supervised/Resolver.pm
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ with 'Treex::Block::Filter::Node::T';
has '+node_types' => ( default => 'all_anaph' );
has 'model_path' => (is => 'ro', isa => 'Str');
has 'align_trg_lang' => ( is => 'ro', isa => 'Treex::Type::LangCode', default => sub {my $self = shift; $self->language } );
has 'align_name' => ( is => 'ro', isa => 'Str', default => 'supervised' );
has 'delete_orig_align' => ( is => 'ro', isa => 'Bool', default => 1 );
has 'skip_annotated' => ( is => 'ro', isa => 'Bool', default => 0 );

has '_model_paths' => (is => 'ro', isa => 'HashRef[HashRef[Str]]', lazy => 1, builder => '_build_model_paths');
has '_rankers' => (is => 'ro', isa => 'HashRef[HashRef[Treex::Tool::ML::VowpalWabbit::Ranker]]', builder => '_build_rankers', lazy => 1);
Expand Down Expand Up @@ -97,13 +99,6 @@ sub _finalize_links {

foreach my $from_id (sort keys %$links) {
my $from_node = $bundle->get_document->get_node_by_id($from_id);
if ($self->delete_orig_align) {
$from_node->delete_aligned_nodes_by_filter({
language => $self->_get_align_lang($from_node->language),
selector => $self->selector,
rel_types => ['!gold','.*'],
});
}
foreach my $to_id (sort keys %{$links->{$from_id}}) {
my $to_node = $bundle->get_document->get_node_by_id($to_id);
next if ($from_id ne $to_id && $from_node->language eq $self->align_trg_lang);
Expand All @@ -128,7 +123,7 @@ sub _finalize_links {
else {
if ($from_node != $to_node) {
log_info "[".(ref $self)."] Adding alignment: " . $from_node->id . " --> " . $to_node->id;
Treex::Tool::Align::Utils::add_aligned_node($from_node, $to_node, "supervised");
Treex::Tool::Align::Utils::add_aligned_node($from_node, $to_node, $self->align_name);
}
$covered_ids{$from_node->id} = 1;
$covered_ids{$to_node->id} = 1;
Expand All @@ -151,6 +146,8 @@ sub _get_align_lang {

sub process_filtered_tnode {
my ($self, $tnode) = @_;

return if ($self->skip_annotated && $tnode->get_attr('is_align_coref'));

my $lang = $tnode->language;
my $align_lang = $self->_get_align_lang($lang);
Expand All @@ -175,6 +172,14 @@ sub process_filtered_tnode {
$winner_idx = $ranker->pick_winner($feats);
}

if ($self->delete_orig_align) {
$tnode->delete_aligned_nodes_by_filter({
language => $self->_get_align_lang($tnode->language),
selector => $self->selector,
rel_types => ['!gold','.*'],
});
}
$tnode->set_attr('is_align_coref', 1);
$self->_add_link($tnode, $cands[$winner_idx]);
}

Expand Down
39 changes: 32 additions & 7 deletions lib/Treex/Block/Coref/RemoveLinks.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use Moose;
use Treex::Core::Common;
extends 'Treex::Core::Block';

has 'type' => ( is => 'ro', isa => enum([qw/all gram text/]), default => 'all' );
has 'type' => ( is => 'ro', isa => enum([qw/all gram text all+special/]), default => 'all' );

sub process_tnode {
my ( $self, $tnode ) = @_;
Expand All @@ -18,25 +18,50 @@ sub process_tnode {
elsif ($self->type eq 'gram') {
$tnode->set_attr( 'coref_gram.rf', undef );
}
elsif ($self->type eq 'all') {
$tnode->set_attr( 'coref_gram.rf', undef );
$tnode->set_attr( 'coref_text.rf', undef );
}
else {
$tnode->set_attr( 'coref_gram.rf', undef );
$tnode->set_attr( 'coref_text.rf', undef );
$tnode->set_attr( 'coref_special', undef );
}
}

1;
__END__
=encoding utf-8
=head1 NAME
Treex::Block::Coref::RemoveLinks
=head1 DESCRIPTION
Removes coreference links from tectogrammatical trees.
=head1 PARAMETERS
=over
=item Treex::Block::Coref::RemoveLinks
=item type
Removes coreference links from tectogrammatical trees. For a purpose of
testing the coreference resolving.
Which type of coreference link should be deleted. Possible values:
gram - grammatical coreference,
text - textual coreference,
all - grammatical and textual coreference,
all+special - grammatical, textual and special coreference.
=back
=cut
=head1 AUTHOR
Michal Novák <[email protected]>
=head1 COPYRIGHT AND LICENSE
Copyright © 2011, 2015-2016 by Institute of Formal and Applied Linguistics, Charles University in Prague
# Copyright 2011, 2015 Michal Novak
# This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
2 changes: 1 addition & 1 deletion lib/Treex/Block/Read/BasePMLReader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ sub _convert_ttree {
'dispmod', 'iterativeness', 'indeftype', 'person', 'numertype', 'politeness', 'negation', 'typgroup',
);
my @list_attribs = (
'compl.rf', 'coref_text.rf', 'coref_text', 'coref_gram.rf', 'a/aux.rf',
'compl.rf', 'coref_text.rf', 'coref_text', 'coref_gram.rf', 'bridging', 'a/aux.rf',
);

$self->_copy_attr( $pml_node, $treex_node, 'deepord', 'ord' );
Expand Down
11 changes: 9 additions & 2 deletions lib/Treex/Block/Read/PCEDT.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ extends 'Treex::Block::Read::BasePMLReader';
use Treex::PML::Factory;
use Treex::PML::Instance;

has p_layer => ( isa => 'Bool', is => 'ro', default => 1, documentation=> 'Do we have phrase-structure trees? Should we load *.p.gz files?');
# layers: analytical, tectogrammatical, constituent (p-) trees
has '+_layers' => ( default => sub { [ 'a', 't', 'p' ] } );

Expand Down Expand Up @@ -35,7 +36,13 @@ sub _convert_ptree {
}

foreach my $pml_child ( $pml_node->children ) {
my $treex_child = $treex_node->create_child();
my $treex_child;
if ($pml_child->attr('#name') eq 'nonterminal') {
$treex_child = $treex_node->create_nonterminal_child();
}
else {
$treex_child = $treex_node->create_terminal_child();
}
$self->_convert_ptree( $pml_child, $treex_child );
}
return;
Expand Down Expand Up @@ -97,7 +104,7 @@ override '_convert_all_trees' => sub {

$zone->set_sentence( $aroot->get_subtree_string );

if ( $language eq 'en' ) {
if ( $self->p_layer && $language eq 'en' ) {
my $proot = $zone->create_ptree;
$self->_convert_ptree( $pmldoc->{$language}{p}->tree($tree_number), $proot );

Expand Down
2 changes: 2 additions & 0 deletions lib/Treex/Core/Node/Aligned.pm
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ sub is_directed_aligned_to {
sub delete_aligned_nodes_by_filter {
my ($node, $filter) = @_;

$filter //= {};
$filter->{directed} //= 0;
my ($nodes, $types) = $node->get_aligned_nodes($filter);
for (my $i = 0; $i < @$nodes; $i++) {
log_debug "[Core::Node::Aligned::delete_aligned_nodes_by_filter]\tremoving: " . $types->[$i] . " " . $nodes->[$i]->id, 1;
Expand Down
29 changes: 28 additions & 1 deletion lib/Treex/Core/Node/T.pm
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ sub get_anodes {
return $self->_process_switches( $arg_ref, @nodes );
}

#------------ coreference nodes -------------------
#------------ coreference and bridging nodes -------------------

sub get_coref_nodes {
my ( $self, $arg_ref ) = @_;
Expand Down Expand Up @@ -274,6 +274,24 @@ sub update_coref_nodes {
return;
}

sub get_bridging_nodes {
my ($self, $arg_ref) = @_;
my $bridging = $self->get_attr('bridging') // [];
my $doc = $self->get_document;
my @nodes = map {$doc->get_node_by_id($_->{'target_node.rf'})} @$bridging;
my @types = map {$_->{'type'}} @$bridging;
return (\@nodes, \@types);
}

sub add_bridging_node {
my ( $self, $node, $type ) = @_;
my $links_rf = $self->get_attr('bridging');
my %new_link = ( 'target_node.rf' => $node->id, 'type' => $type // ''); #/ so we have no undefs
push( @$links_rf, \%new_link );
$self->set_attr( 'bridging', $links_rf );
return;
}

# ----------- complement nodes -------------

sub get_compl_nodes {
Expand Down Expand Up @@ -455,6 +473,15 @@ Remove the specified nodes from C<coref_gram.rf> or C<coref_text.rf> (if they ar
Remove all invalid coreferences from C<coref_gram.rf> and C<coref_text.rf>.
=item $node->get_bridging_nodes()
Access the nodes referred from the current node by bridging anaphora (in C<bridging> attribute).
The method returns references to two lists of the equal length: the referred nodes and the types of bridging relations.
=item $node->add_bridging_node($node, $type)
Add bridging anaphora to C<$node> of type C<$type> (to C<bridging>).
=back
=head2 Access to source language t-layer (in MT)
Expand Down
10 changes: 9 additions & 1 deletion lib/Treex/Core/TredView.pm
Original file line number Diff line number Diff line change
Expand Up @@ -659,15 +659,23 @@ sub node_style_hook {
my @target_ids;
my @arrow_types;

foreach my $ref_attr ( 'coref_gram', 'coref_text', 'compl' ) {
foreach my $ref_attr ( 'coref_gram', 'coref_text', 'compl', 'bridging' ) {
if ( defined $node->attr( $ref_attr . '.rf' ) ) {
foreach my $target_id ( @{ $node->attr( $ref_attr . '.rf' ) } ) {
push @target_ids, $target_id;
push @arrow_types, $ref_attr;
}
}
elsif ( defined $node->attr( $ref_attr ) ) {
my $links = $node->attr( $ref_attr );
foreach my $link (@$links) {
push @target_ids, $link->{'target_node.rf'};
push @arrow_types, $ref_attr;
}
}
}


# P-layer indexes and coindexes
if ( $node->get_layer eq 'p' ) {
my $coindex;
Expand Down
21 changes: 12 additions & 9 deletions lib/Treex/Core/TredView/Colors.pm
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,22 @@ sub _build_colors {

'coref_gram' => '#c05633',
'coref_text' => '#4c509f',
'bridging' => '#00ccff',
'compl' => '#629f52',
'coindex' => '#ffa500', #orange

# various alignment link types
'alignment' => '#bebebe',
'left' => '#bebebe',
'right' => '#bebebe',
'int' => '#bebebe',
'gdfa' => '#bebebe',
'revgdfa' => '#bebebe',
'rule-based' => '#bebebe',
'monolingual' => '#bebebe',
'copy' => '#bebebe',
'alignment' => '#bebebe',
'left' => '#bebebe',
'right' => '#bebebe',
'int' => '#bebebe',
'gdfa' => '#bebebe',
'revgdfa' => '#bebebe',
'rule-based' => '#bebebe',
'monolingual' => '#bebebe',
'coref_supervised' => '#bebebe',
'coref_gold' => '#bebebe',
'copy' => '#bebebe',

'lex' => '#006400',
'aux' => '#ff8c00',
Expand Down
10 changes: 9 additions & 1 deletion lib/Treex/Core/TredView/Labels.pm
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ sub _tnode_labels {
$line1 .= $self->_colors->get( 'sentmod', 1 ) . '.' . $node->sentmod;
}

foreach my $type ( 'compl', 'coref_text', 'coref_gram' ) {
foreach my $type ( 'compl', 'coref_text', 'coref_gram', 'bridging' ) {
if ( defined $node->{ $type . '.rf' } ) {
foreach my $ref ( TredMacro::ListV( $node->{ $type . '.rf' } ) ) {
my $ref_node = $self->_treex_doc->get_node_by_id($ref);
Expand All @@ -292,6 +292,14 @@ sub _tnode_labels {
}
}
}
elsif ( defined $node->{ $type } ) {
foreach my $ref ( map {$_->{ 'target_node.rf' }} TredMacro::ListV( $node->{ $type } ) ) {
my $ref_node = $self->_treex_doc->get_node_by_id($ref);
if ( $node->get_bundle->get_position() != $ref_node->get_bundle->get_position() ) {
$line1 .= ' ' . $self->_colors->get( $type, 1 ) . $ref_node->{t_lemma};
}
}
}
}

my $line2 = $node->{functor};
Expand Down
20 changes: 11 additions & 9 deletions lib/Treex/Core/TredView/LineStyles.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@ has '_dash' => (
sub _build_dash {
return {
# just alignment lines are dashed, others are plain
'alignment' => '5,3',
'left' => '5,3',
'right' => '5,3',
'int' => '5,3',
'gdfa' => '5,3',
'revgdfa' => '5,3',
'rule-based' => '5,3',
'monolingual' => '5,3',
'copy' => '5,3',
'alignment' => '5,3',
'left' => '5,3',
'right' => '5,3',
'int' => '5,3',
'gdfa' => '5,3',
'revgdfa' => '5,3',
'rule-based' => '5,3',
'monolingual' => '5,3',
'coref_supervised' => '5,3',
'coref_gold' => '5,3',
'copy' => '5,3',
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@

<!-- References -->
<member name="compl.rf"><list ordered="0"><cdata format="PMLREF"/></list></member>
<member name="bridging" required="0">
<list ordered="0" type="t-bridging-link.type"/>
</member>
<member name="coref_text.rf"><list ordered="0"><cdata format="PMLREF"/></list></member>
<member name="coref_text" required="0">
<list ordered="0" type="t-coref_text-link.type"/>
Expand Down Expand Up @@ -118,6 +121,9 @@
<list ordered="0" type="align-links.type"/>
</member>

<!-- align_coref stuff (mnovak 26.2.2016) -->
<member name="is_align_coref" type="bool.type"/>

<!-- TimeML stuff (ptacek 22.2.2010)-->
<member name="timeml">
<structure>
Expand Down Expand Up @@ -202,6 +208,29 @@
</member>
</structure>
</type>

<type name="t-bridging-link.type">
<structure>
<member name="target_node.rf" required="1">
<cdata format="PMLREF"/>
</member>
<member name="type" type="t-bridging-type.type" required="1"/>
</structure>
</type>

<type name="t-bridging-type.type">
<choice>
<value>CONTRAST</value>
<value>SUB_SET</value>
<value>SET_SUB</value>
<value>PART_WHOLE</value>
<value>WHOLE_PART</value>
<value>FUNCT_P</value>
<value>P_FUNCT</value>
<value>REST</value>
<value>ANAF</value>
</choice>
</type>

<type name="t-coref_text-link.type">
<structure>
Expand Down Expand Up @@ -388,6 +417,8 @@

<type name="t-dispmod.type">
<choice>
<value>disp3</value>
<value>disp2</value>
<value>disp1</value>
<value>disp0</value>
<value>nr</value>
Expand Down
Loading

0 comments on commit 9757501

Please sign in to comment.