conll · ns-moosavi · Jun 23, 2016 · Jun 23, 2016 · Jul 24, 2016 · Jul 25, 2016
diff --git a/README.md b/README.md
@@ -0,0 +1,55 @@
+# LEA Coreference Scorer
+
+Implementation of the **LEA** coreference evaluation metric integrated into the CoNLL official scorer v8.01.
+
+## Description
+
+LEA is a Link-Based Entity-Aware metric that is designed to overcome the shortcomings of the previous evaluation metrics.
+For each entity, **LEA** considers how important the entity is and how well it is resolved, i.e. importance(entity) * resolution-score(entity).
+
+The number of unique links in an entity with "n" mentions is link(entity)=n*(n-1)/2.
+The resolution score of an entity is computed as the fraction of correctly resolved coreference links.
+
+In the provided implementation, we consider the size of an entity as a measure of importance, i.e. importance(entity)=|entity|.
+Therefore, the more prominent entities of the text get higher importance values.
+However, according to the end-task or domain used, one can choose other importance measures based on other factors like the entity type or the included mention types.
+
+## Usage
+
+**LEA** is integrated into the official CoNLL scorer v8.01 available at http://conll.github.io/reference-coreference-scorers.  
+The usage of the official CoNLL scorer (Pradhan et al., 2014) is as follows:
+
+
+     perl scorer.pl <metric> <key> <response> [<document-id>]
+
+
+     <metric>: the metric desired to score the results. one of the following values:
+
+     muc: MUCScorer (Vilain et al, 1995)
+     bcub: B-Cubed (Bagga and Baldwin, 1998)
+     ceafm: CEAF (Luo et al., 2005) using mention-based similarity
+     ceafe: CEAF (Luo et al., 2005) using entity-based similarity
+     blanc: BLANC (Luo et al., 2014) BLANC metric for gold and predicted mentions
+     lea: LEA (Moosavi and Strube, 2016)
+     all: uses all the metrics to score
+
+     <key>: file with expected coreference chains in CoNLL-2011/2012 format
+
+     <response>: file with output of coreference system (CoNLL-2011/2012 format)
+
+     <document-id>: optional. The name of the document to score. If name is not
+                    given, all the documents in the dataset will be scored. If given
+                    name is "none" then all the documents are scored but only total
+                    results are shown.
+
+##References
+
+    Nafise Sadat Moosavi and Michael Strube. 2016. 
+    Which Coreference Evaluation Metric Do You Trust? A Proposal for a Link-based Entity Aware Metric. 
+    In Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics.
+
+    Sameer Pradhan, Xiaoqiang Luo, Marta Recasens, Eduard Hovy, Vincent Ng, and Michael Strube. 2014. 
+    Scoring coreference partitions of predicted mentions: A reference implementation. 
+    In Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers),
+    Baltimore, Md., 22–27 June 2014, pages 30–35.
+
diff --git a/lib/CorScorer.pm b/lib/CorScorer.pm
@@ -24,7 +24,6 @@ package CorScorer;
 # Revised in March, 2014 by Sameer Pradhan (sameer.pradhan <at> childrens.harvard.edu)
 # to implement the BLANC metric for predicted mentions
 
-
 use strict;
 use Algorithm::Munkres;
 use Data::Dumper;
@@ -33,7 +32,7 @@ use Data::Dumper;
 use Math::Combinatorics;
 use Cwd;
 
-our $VERSION = '8.01';
+our $VERSION = '9.0';
 print "version: " . $VERSION . " " . Cwd::realpath(__FILE__) . "\n";
 
 ##
@@ -94,7 +93,7 @@ my $KEY_COLUMN      = -1;
 # F1 = 2 * Recall * Precision / (Recall + Precision)
 sub Score {
   my ($metric, $kFile, $rFile, $name) = @_;
-	our $repeated_mentions = 0;
+  our $repeated_mentions = 0;
 
   if (lc($metric) eq 'blanc') {
     return ScoreBLANC($kFile, $rFile, $name);
@@ -128,7 +127,8 @@ sub Score {
     foreach my $iname (keys(%{$kIndexNames})) {
       my $keys =
         GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
-      my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname,
+      my $response =
+        GetCoreference($rFile, $RESPONSE_COLUMN, $iname,
         $rIndexNames->{$iname});
 
       print "$iname:\n" if ($VERBOSE);
@@ -233,7 +233,7 @@ sub GetCoreference {
     my @sentId;
     while (my $l = <F>) {
       chomp($l);
-			$l =~ s/^\s+$//;
+      $l =~ s/^\s+$//;
       next if ($l eq '');
       if ($l =~ /\#\s*end document/) {
         foreach my $h (@half) {
@@ -339,7 +339,7 @@ sub GetFileNames {
 }
 
 sub IdentifMentions {
-	my ($keys, $response, $totals) = @_;
+  my ($keys, $response, $totals) = @_;
   my @kChains;
   my @kChainsWithSingletonsFromResponse;
   my @rChains;
@@ -369,21 +369,21 @@ sub IdentifMentions {
 
     my $i = 0;
     my @remove;
-		
+
     foreach my $mention (@$entity) {
       if (defined($map{"$mention->[0],$mention->[1]"})) {
         print "Repeated mention in the response: $mention->[0], $mention->[1] ",
           $map{"$mention->[0],$mention->[1]"},
           $id{"$mention->[0],$mention->[1]"},
           "\n";
         push(@remove, $i);
-				$main::repeated_mentions++;
+        $main::repeated_mentions++;
 
-				if ($main::repeated_mentions > 10)
-				{
-						print STDERR "Found too many repeated mentions (> 10) in the response, so refusing to score. Please fix the output.\n";
-						exit 1;
-				}
+        if ($main::repeated_mentions > 10) {
+          print STDERR
+"Found too many repeated mentions (> 10) in the response, so refusing to score. Please fix the output.\n";
+          exit 1;
+        }
 
       }
       elsif (defined($id{"$mention->[0],$mention->[1]"})
@@ -403,25 +403,21 @@ sub IdentifMentions {
     }
   }
 
+  # now, lets remove any empty elements in the response array after removing
+  # potential repeats
+  my @another_remove = ();
+  my $ii;
 
-	# now, lets remove any empty elements in the response array after removing
-	# potential repeats
-	my @another_remove = ();
-	my $ii;
-
-	foreach my $eentity (@$response)
-	{
-			if ( @$eentity == 0)
-			{
-					push(@another_remove, $ii);
-			}
-			$ii++;
-	}
-
-	foreach my $iii (sort { $b <=> $a } (@another_remove)) {
-      splice(@$response, $iii, 1);
-	}
+  foreach my $eentity (@$response) {
+    if (@$eentity == 0) {
+      push(@another_remove, $ii);
+    }
+    $ii++;
+  }
 
+  foreach my $iii (sort { $b <=> $a } (@another_remove)) {
+    splice(@$response, $iii, 1);
+  }
 
   # Partial identificaiton: Inside bounds and including the head
   my $part = 0;
@@ -583,12 +579,18 @@ sub Eval {
   elsif ($scorer eq 'bcub') {
     ($nr, $dr, $np, $dp) = BCUBED($keyChainsOrig, $responseChainsOrig);
   }
+  elsif ($scorer eq 'ebcub') {
+    ($nr, $dr, $np, $dp) = EBCUBED($keyChainsOrig, $responseChainsOrig);
+  }
   elsif ($scorer eq 'ceafm') {
     ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 1);
   }
   elsif ($scorer eq 'ceafe') {
     ($nr, $dr, $np, $dp) = CEAF($keyChainsOrig, $responseChainsOrig, 0);
   }
+  elsif ($scorer eq 'lea') {
+    ($nr, $dr, $np, $dp) = LEA($keyChainsOrig, $responseChainsOrig);
+  }
   else {
     die "Metric $scorer not implemented yet\n";
   }
@@ -703,6 +705,80 @@ sub BCUBED {
   return ($acumR, $keymentions, $acumP, $resmentions);
 }
 
+sub LEA {
+  my ($keys, $responses) = @_;
+
+  # Computing recall
+  my ($acumR, $keysImportance) = LEASUB($keys, $responses);
+
+  # Computing precision
+  my ($acumP, $responsesImportance) = LEASUB($responses, $keys);
+
+  ShowRPF($acumR, $keysImportance, $acumP, $responsesImportance) if ($VERBOSE);
+  return ($acumR, $keysImportance, $acumP, $responsesImportance);
+
+}
+
+sub LEASUB {
+  my ($keys, $responses) = @_;
+
+  my $rIndex = Indexa($responses);
+
+  my $leaScore   = 0;
+  my $importance = 0;
+
+  foreach my $kEntity (@$keys) {
+    next if (!defined($kEntity));
+    my $entitySize     = scalar(@$kEntity);
+    my $resolvedLinks  = 0;
+    my @mappedEntities = ();
+
+    if ($entitySize == 1) {    #singletons
+      my $cMention = $kEntity->[0];
+      my $rEntity =
+        (defined($rIndex->{$cMention}))
+        ? $responses->[$rIndex->{$cMention}]
+        : [];
+      my $rEntitySize = scalar(@$rEntity);
+
+      if ($rEntitySize == 1)
+      { #the source singleton mention is also a singleton in the target entities
+        $resolvedLinks++;
+      }
+    }
+    else {
+      for (my $i = 0 ; $i < @$kEntity ; $i++) {
+        my $cMention = $kEntity->[$i];
+        for (my $j = $i + 1 ; $j < @$kEntity ; $j++) {
+          my $nMention = $kEntity->[$j];
+          if ( defined($rIndex->{$cMention})
+            && defined($rIndex->{$nMention})
+            && $rIndex->{$cMention} == $rIndex->{$nMention})
+          {
+            $resolvedLinks++;
+          }
+        }
+      }
+    }
+
+    my $entityLinks;
+    if ($entitySize == 1) {
+      $entityLinks = 1;
+    }
+    else {
+      $entityLinks = ($entitySize * ($entitySize - 1) / 2) if ($entitySize);
+    }
+
+    my $resolutionScore = 0;
+    $resolutionScore = $resolvedLinks / $entityLinks if ($entityLinks);
+    $leaScore   += ($resolutionScore * $entitySize);
+    $importance += $entitySize;
+  }
+
+  return ($leaScore, $importance);
+
+}
+
 # type = 0: Entity-based
 # type = 1: Mention-based
 sub CEAF {
@@ -788,6 +864,9 @@ sub SIMEntityBased {
     }
   }
 
+  #  if ($intersection == 1){
+  #    $intersection = 0
+  #  }
   my $r = 0;
   my $d = scalar(@$a) + scalar(@$b);
   if ($d != 0) {
@@ -866,7 +945,8 @@ sub ScoreBLANC {
     foreach my $iname (keys(%{$kIndexNames})) {
       my $keys =
         GetCoreference($kFile, $KEY_COLUMN, $iname, $kIndexNames->{$iname});
-      my $response = GetCoreference($rFile, $RESPONSE_COLUMN, $iname,
+      my $response =
+        GetCoreference($rFile, $RESPONSE_COLUMN, $iname,
         $rIndexNames->{$iname});
 
       print "$name:\n" if ($VERBOSE);

diff --git a/scorer.bat b/scorer.bat
@@ -31,6 +31,7 @@ if (@ARGV < 3) {
      bcub: B-Cubed (Bagga and Baldwin, 1998)
      ceafm: CEAF (Luo et al, 2005) using mention-based similarity
      ceafe: CEAF (Luo et al, 2005) using entity-based similarity
+     lea: LEA (Moosavi and Strube, 2016)
      all: uses all the metrics to score
 
   keys_file: file with expected coreference chains in SemEval format
@@ -47,14 +48,14 @@ if (@ARGV < 3) {
 }
 
 my $metric = shift (@ARGV);
-if ($metric !~ /^(muc|bcub|ceafm|ceafe|all)/i) {
+if ($metric !~ /^(muc|bcub|ceafm|ceafe|lea|all)/i) {
   print "Invalid metric\n";
   exit;
 }
 
 
 if ($metric eq 'all') {
-  foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe') {
+  foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'lea') {
     print "\nMETRIC $m:\n";
     &CorScorer::Score( $m, @ARGV );
   }

diff --git a/scorer.pl b/scorer.pl
@@ -25,6 +25,7 @@ BEGIN
     ceafm: CEAF (Luo et al, 2005) using mention-based similarity
     ceafe: CEAF (Luo et al, 2005) using entity-based similarity
     blanc: BLANC
+    lea: LEA (Moosavi and Strube, 2016)
     all: uses all the metrics to score
 
   keys_file: file with expected coreference chains in SemEval format
@@ -41,13 +42,13 @@ BEGIN
 }
 
 my $metric = shift(@ARGV);
-if ($metric !~ /^(muc|bcub|ceafm|ceafe|blanc|all)/i) {
+if ($metric !~ /^(muc|bcub|ebcub|ceafm|ceafe|blanc|lea|all)/i) {
   print "Invalid metric\n";
   exit;
 }
 
 if ($metric eq 'all') {
-  foreach my $m ('muc', 'bcub', 'ceafm', 'ceafe', 'blanc') {
+  foreach my $m ('muc', 'bcub', 'ceafe', 'lea') {
     print "\nMETRIC $m:\n";
     &CorScorer::Score($m, @ARGV);
   }