This repository was archived by the owner on Aug 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgroup_by.go
121 lines (105 loc) · 3.76 KB
/
group_by.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package iter
import "github.com/barweiss/go-tuple"
// GroupBy returns a new iterator which yields tuples whose first field is a key
// returned by f, and whose second field is a sub-iterator yielding a group of
// consecutive values from the input iterator for which f returned the key in
// the first field.
func GroupBy[K comparable, V any](i Iter[V], f func(value V) (key K)) Iter[tuple.T2[K, Iter[V]]] {
next, ok := i()
if !ok {
return Empty[tuple.T2[K, Iter[V]]]()
}
currentKey := f(next)
// The outer pointer of recentCached is nil when there was no previous
// sub-iterator. The inner slice is nil when the previous sub-iterator is
// still being lazily evaluated and non-nil (but possibly of length 0)
// otherwise.
var recentCached *[]V
return func() (tuple.T2[K, Iter[V]], bool) {
if recentCached != nil && *recentCached == nil {
// In this case, we've returned a sub-iterator in the past, and the
// most recent one of those is still being lazily evaluated. In
// order to determine whether we can return another sub-iterator, we
// have to evaluate the input iterator until we get a new key.
// Initialize this to mark the sub-iterator as not requiring any
// further evaluation of the input iterator in case we exit the loop
// on the first iteration.
*recentCached = []V{}
for {
var ok bool
next, ok = i()
if !ok {
// The input iterator is no longer returning values so there
// cannot be another sub-iterator, so we return that the
// outer iterator is exhausted.
return tuple.T2[K, Iter[V]]{}, false
}
nextKey := f(next)
if nextKey != currentKey {
currentKey = nextKey
break
}
*recentCached = append(*recentCached, next)
}
}
// If we make it here then there's a next sub-iterator which corresponds
// to currentKey, and next is the value that should be returned first
// from this sub-iterator.
// currentCached is nil when the sub-iterator that we're about to return
// is still being evaluated lazily, and non-nil (but possibly of length
// 0) otherwise.
var currentCached []V
recentCached = ¤tCached
// first saves the current value of next, which is always a value that
// hasn't been returned by any iterator yet. We can't just use next
// directly because this iterator might return its first value after
// next has already been re-assigned.
first := next
firstReturned := false
return tuple.T2[K, Iter[V]]{
V1: currentKey,
V2: func() (V, bool) {
// Make sure we've returned the first value.
if !firstReturned {
firstReturned = true
return first, true
}
if currentCached != nil {
// If we're no longer being lazily evaluated...
if len(currentCached) == 0 {
// ...then if there are no more cached items, return
// that the iterator is exhausted.
var z V
return z, false
}
// ...then if there are more cached items, return the next
// cached value and remove it from the cached list.
res := currentCached[0]
currentCached = currentCached[1:]
return res, true
}
// Otherwise, evaluate the input iterator.
var ok bool
next, ok = i()
if !ok {
// Don't set currentCached in this case, because when
// evaluating the outer iterator, we should take the slow
// path then realize that the input iterator is exhausted.
var z V
return z, false
}
nextKey := f(next)
if nextKey != currentKey {
// Update currentKey, and do set currentCached in this case,
// because taking the slow path in the other iterator would
// be incorrect since we'd miss the current value of next.
currentKey = nextKey
currentCached = []V{}
var z V
return z, false
}
return next, true
},
}, true
}
}