-
Notifications
You must be signed in to change notification settings - Fork 0
/
NLPTree.py
121 lines (103 loc) · 2.97 KB
/
NLPTree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python
# [email protected] (Jason Riesa)
# Python tree class for reading and processing PTB-style trees
import re
import weakref
from Tree import Tree
from PartialGridAlignment import PartialGridAlignment
class NLPTree(Tree):
def setup(self, data, children = None):
self.data = data
self.span = None
self.parent = None
self.span = None
self.children = [ ]
if children is not None:
self.children = children
for ci, child in enumerate(self.children):
child.parent = weakref.ref(self)
child.order = ci
self.terminals = [ ]
self.oracle = None
self.hope = None
self.fear = None
self.order = 0
self.i = -1
self.j = -1
# Hold list of PartialGridAlignments associated with this node
self.partialAlignments = []
self.partialAlignments_hope = []
self.partialAlignments_fear = []
def write(self):
"""
Print a PTB-style string encoding of the tree
"""
self.dfs_write_ptb()
print
def getPreTerminals(self):
"""
Return a list of preterminal nodes in the span of this node
"""
for child in self.children:
if len(child.children) == 0:
self.terminals.append(weakref.ref(self))
else:
for terminal in child.getPreTerminals():
self.terminals.append(terminal)
return self.terminals
def getTerminal(self, i):
"""
Return terminal with index i.
Store only weak references to terminals.
"""
return self.terminals[i]()
def getTerminals(self):
for t in self.terminals:
yield t()
def span_start(self):
if self.span is not None:
# Dont recompute if we already know the answer
return self.span[0]
if len(self.children) > 0:
return self.children[0].span_start()
if(len(self.children) == 0):
return self.eIndex
def get_span(self):
if self.span is None:
start = self.span_start()
end = self.span_end()
self.span = (start,end)
return self.span
def span_end(self):
if self.span is not None:
# Dont recompute if we already know the answer
return self.span[1]
if len(self.children) > 0:
return self.children[-1].span_end()
if(len(self.children) == 0):
return self.eIndex
def isWithinSpan(self,index):
# Return True if index is within the span of this node
mySpan = self.get_span()
return index >= mySpan[0] and index <= mySpan[1]
def detach(self):
if self.parent():
self.parent().delete_child(self.order)
def delete_child(self, i):
self.children[i].parent = None
self.children[i].order = 0
self.children[i:i+1] = []
for j in range(i,len(self.children)):
self.children[j].order = j
def __str__(self):
if len(self.children) != 0:
s = "(" + str(self.data)
for child in self.children:
s += " " + child.__str__()
s += ")"
return s
else:
s = str(self.data)
s = re.sub("\(", "-LRB-", s)
s = re.sub("\)", "-RRB-", s)
return s