This repository has been archived by the owner on Jun 20, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
near_agreement_multipi.py
executable file
·80 lines (73 loc) · 2.9 KB
/
near_agreement_multipi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from argparse import ArgumentParser
from functools import partial
from utils import *
from windowdiff import *
def pairwise_agreement(c, k, judgments):
"""
Given a count `c` of coders, a window size `k`, and counts of label
(number of boundaries) judgments for a window, calculate the
pairwise agreement on that window: the proportion of agreeing
judgment pairs out of the total number of judgment pairs for that
window.
"""
return (sum([ judgments[label]*(judgments[label]-1) for label in range(k+1) ])
/ float(c*(c-1)))
def observed_agreement(c, k, judgments):
"""
Given a count `c` of coders, a window size `k`, and counts of
labels (number of boundaries judgments) for a list of windows,
calculate the observed agreement.
"""
window_agreements = [ pairwise_agreement(c, k, j) for j in judgments ]
return sum(window_agreements) / len(window_agreements)
def expected_agreement_pi(judgments):
"""
Given counts of labels (number of boundaries judgments) for a list
of windows, calculate the expected (due to chance) agreement
according to Fleiss’s multi-π.
"""
label_totals = reduce(lambda x,y: x+y, judgments).values()
return sum([ n**2 for n in label_totals ]) / float(sum(label_totals))**2
def near_agreement(segmentations, k=None,
expected_agreement=expected_agreement_pi):
"""
Given coders' segmentations (represented as segment masses),
calculate agreement on boundary counts within windows.
"""
index_lists, m = masses_to_indexes(segmentations)
if k is None: k = window_size(segmentations)
judgments = count_window_judgments(index_lists, m, k)
c = len(segmentations)
o = observed_agreement(c, k, judgments)
e = expected_agreement_pi(judgments)
return (o-e) / (1-e)
def main():
"Calculates near segmentation agreement using Fleiss’s multi-π."
p = ArgumentParser(description=main.__doc__)
p.add_argument(
'filename', help='name of the JSON segmentation file')
p.add_argument(
'--reference', help='name of the reference annotator used to calculate k')
args = p.parse_args()
o = load_segmentation_data(args.filename)
if args.reference is None:
# Calculate window size per document
agreement = near_agreement
else:
# Use one window size across all documents
ra = 'annotators:{}'.format(args.reference)
reference_segmentations = [ s[ra] for s in o['items'].values() ]
k = int(round(mean_segment_mass(reference_segmentations) / 2))
agreement = partial(near_agreement, k=k)
print '''
Near segmentation agreement (Fleiss’s multi-π), where agreement is
modeled as two annotators counting the same number of boundaries
within a window (i.e. WindowDiff):
'''
print_coefficients(per_document_coefficients(o['items'], agreement))
print '\nOverall agreement: {:.2f}\n'.format(
overall_coefficient(o['items'], agreement))
if __name__ == "__main__":
main()