-
Notifications
You must be signed in to change notification settings - Fork 0
/
dynamic.go
232 lines (208 loc) · 4.39 KB
/
dynamic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
package trietree
import (
"context"
"unicode/utf8"
)
// DTree is dynamic tree.
type DTree struct {
Root DNode
lastEdgeID int
}
// DNode is a node of dynamic tree.
type DNode struct {
// Label is a rune assigned this node. The label is unique among sibling
// nodes
Label rune
// EdgeID indicates the node has a corresponding key or not.
EdgeID int
// Level is equals key length when EdgeID is not zero.
Level int
// Low is sibling nodes which have smaller Label.
Low *DNode
// High is sibling node which have greater Label.
High *DNode
// Child is a top node of children nods.
Child *DNode
// Failure is used as a search destination when the desired Label is not
// found in Child. This will be filled by FillFailure().
Failure *DNode
}
// dig searches for a node with the desired label among its sibling nodes, or
// creates one if it does not exist.
func (dn *DNode) dig(c rune) *DNode {
p := dn.Child
if p == nil {
dn.Child = &DNode{Label: c}
return dn.Child
}
for {
if c == p.Label {
return p
}
if c < p.Label {
if p.Low == nil {
p.Low = &DNode{Label: c}
return p.Low
}
p = p.Low
} else {
if p.High == nil {
p.High = &DNode{Label: c}
return p.High
}
p = p.High
}
}
}
// Get obtains an existing child node for rune.
func (dn *DNode) Get(r rune) *DNode {
p := dn.Child
for p != nil {
if r == p.Label {
return p
}
if r < p.Label {
p = p.Low
} else {
p = p.High
}
}
return nil
}
// Put allocates an edge node for k key and emits ID for it.
// The ID will be greater than zero.
func (dt *DTree) Put(k string) int {
n := &dt.Root
level := 0
for _, r := range k {
n = n.dig(r)
level++
}
if n.EdgeID <= 0 {
dt.lastEdgeID++
n.EdgeID = dt.lastEdgeID
}
n.Level = level
return n.EdgeID
}
// Scan scans a string to find matched words.
func (dt *DTree) Scan(s string, r ScanReporter) error {
return dt.ScanContext(context.Background(), s, r)
}
// ScanContext scans a string to find matched words.
// ScanReporter r will receive reports for each characters when scan.
func (dt *DTree) ScanContext(ctx context.Context, s string, r ScanReporter) error {
sr := newScanReport(r, len(s))
curr := &dt.Root
for i, c := range s {
next := dt.nextNode(curr, c)
// emit a scan event.
sr.reset(i, c)
for n := next; n != nil; n = n.Failure {
if n.EdgeID > 0 {
sr.add(n.EdgeID, n.Level)
}
}
sr.emit()
// prepare for next.
if err := ctx.Err(); err != nil {
return err
}
curr = next
}
return nil
}
func (dt *DTree) nextNode(curr *DNode, c rune) *DNode {
root := &dt.Root
for {
next := curr.Get(c)
if next != nil {
return next
}
if curr == root {
return root
}
curr = curr.Failure
if curr == nil {
curr = root
}
}
}
type procDNode func(*DNode)
// eachSiblings applies the function fn to all sibling nodes.
func (dn *DNode) eachSiblings(fn procDNode) {
if dn == nil {
return
}
dn.Low.eachSiblings(fn)
fn(dn)
dn.High.eachSiblings(fn)
}
// Get retrieve a node for key, otherwise returns nil.
func (dt *DTree) Get(k string) *DNode {
n := &dt.Root
for _, r := range k {
n = n.Get(r)
if n == nil {
return nil
}
}
return n
}
// FillFailure fill Failure field with Aho-Corasick algorithm.
func (dt *DTree) FillFailure() {
dt.Root.Failure = &dt.Root
dt.fillFailure(&dt.Root)
dt.Root.Failure = nil
}
func (dt *DTree) fillFailure(parent *DNode) {
if parent.Child == nil {
return
}
//fmt.Printf("fillFailure: parent(%p)=%+[1]v\n", parent)
pf := parent.Failure
parent.Child.eachSiblings(func(curr *DNode) {
f := dt.nextNode(pf, curr.Label)
if f == curr {
f = &dt.Root
}
curr.Failure = f
//fmt.Printf(" curr(%p)=%+[1]v\n", curr)
dt.fillFailure(curr)
})
}
// CountChild counts child nodes.
func (dn *DNode) CountChild() int {
c := 0
dn.Child.eachSiblings(func(*DNode) { c++ })
return c
}
// CountAll counts all descended nodes.
func (dn *DNode) CountAll() int {
c := 1
dn.Child.eachSiblings(func(n *DNode) {
c += n.CountAll()
})
return c
}
// LongestPrefix finds a longest prefix node/edge matches given s string.
func (dt *DTree) LongestPrefix(s string) (prefix string, edgeID int) {
var last *DNode
ilast := 0
curr := &dt.Root
for i, r := range s {
next := curr.Get(r)
if next == nil {
break
}
if next.EdgeID > 0 {
last = next
ilast = i
}
curr = next
}
if last == nil {
return "", 0
}
return s[:ilast+utf8.RuneLen(last.Label)], last.EdgeID
}