-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathein_reduce.h
487 lines (434 loc) · 21 KB
/
ein_reduce.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/** \file ein_reduce.h
* \brief Optional helper for computing Einstein reductions on arrays.
*/
#ifndef NDARRAY_EIN_REDUCE_H
#define NDARRAY_EIN_REDUCE_H
#include "array/array.h"
namespace nda {
namespace internal {
// TODO: Find a way to enable operations with non-op types? e.g. scalars.
template <class T>
using enable_if_ein_op =
std::enable_if_t<std::is_same<typename T::is_ein_op, std::true_type>::value>;
template <class T>
using enable_if_ein_assign =
std::enable_if_t<std::is_same<typename T::is_assign, std::true_type>::value>;
// Briefly, the high level goal of the next few classes is to enable construction of
// expressions describing Einstein summations or other reductions. This is done using
// a small expression template system. Normally, expression templates are troublesome
// due to overwhemling the compiler's ability to do CSE and other optimziations. In
// the case of Einstein reductions, the expressions will usually be very small...
template <class Derived>
struct ein_op_base {
using is_ein_op = std::true_type;
using is_assign = std::false_type;
// We need to be able to get the derived type when creating binary operations using
// this operation as an operand.
const Derived& derived() const { return *static_cast<const Derived*>(this); }
auto operator-() const { return make_ein_op_negate(derived()); }
template <class T, class = enable_if_ein_op<T>>
auto operator+(const T& r) const {
return make_ein_op_add(derived(), r);
}
template <class T, class = enable_if_ein_op<T>>
auto operator-(const T& r) const {
return make_ein_op_sub(derived(), r);
}
template <class T, class = enable_if_ein_op<T>>
auto operator*(const T& r) const {
return make_ein_op_mul(derived(), r);
}
template <class T, class = enable_if_ein_op<T>>
auto operator/(const T& r) const {
return make_ein_op_div(derived(), r);
}
};
// A leaf operand of an Einstein reduction expression. The Is... indicate the
// dimension of the reduction to use to address this operand.
template <class Op, size_t... Is>
struct ein_op : public ein_op_base<ein_op<Op, Is...>> {
Op op;
ein_op(Op op) : op(std::move(op)) {}
// The largest dimension used by this operand.
static constexpr index_t MaxIndex = sizeof...(Is) == 0 ? -1 : variadic_max(Is...);
// auto doesn't work here because it doesn't include the reference type of operator() when we
// need it, but it writing it includes it when we can't, e.g. if op(...) doesn't return a
// reference.
template <class Idx>
NDARRAY_INLINE decltype(op(Is...)) operator()(const Idx& i) const {
return op(std::get<Is>(i)...);
}
// Only ein_op has assignment operators.
template <class T, class = enable_if_ein_op<T>>
auto operator=(const T& r) const {
return make_ein_op_assign(*this, r);
}
template <class T, class = enable_if_ein_op<T>>
auto operator+=(const T& r) const {
return make_ein_op_add_assign(*this, r);
}
template <class T, class = enable_if_ein_op<T>>
auto operator-=(const T& r) const {
return make_ein_op_sub_assign(*this, r);
}
template <class T, class = enable_if_ein_op<T>>
auto operator*=(const T& r) const {
return make_ein_op_mul_assign(*this, r);
}
};
// A unary operation on an Einstein operand.
template <class Op, class Derived>
struct ein_unary_op : public ein_op_base<Derived> {
Op op;
ein_unary_op(const Op& op) : op(op) {}
static constexpr index_t MaxIndex = Op::MaxIndex;
};
// Unary negate.
template <class Op>
struct ein_negate_op : public ein_unary_op<Op, ein_negate_op<Op>> {
using base = ein_unary_op<Op, ein_negate_op<Op>>;
ein_negate_op(const Op& op) : base(op) {}
template <class Idx>
NDARRAY_INLINE auto operator()(const Idx& i) const {
return -base::op(i);
}
};
template <class Op>
auto make_ein_op_negate(const Op& op) {
return ein_negate_op<Op>(op);
}
// Cast to a different type.
template <class Type, class Op>
struct ein_cast_op : public ein_unary_op<Op, ein_cast_op<Type, Op>> {
using base = ein_unary_op<Op, ein_cast_op<Type, Op>>;
ein_cast_op(const Op& op) : base(op) {}
template <class Idx>
NDARRAY_INLINE auto operator()(const Idx& i) const {
return static_cast<Type>(base::op(i));
}
};
// A binary operation of two operands.
template <class OpA, class OpB, class Derived>
struct ein_binary_op : public ein_op_base<Derived> {
OpA op_a;
OpB op_b;
ein_binary_op(const OpA& a, const OpB& b) : op_a(a), op_b(b) {}
static constexpr index_t MaxIndex = internal::max(OpA::MaxIndex, OpB::MaxIndex);
};
#define NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op) \
template <class OpA, class OpB> \
auto make_##name(const OpA& a, const OpB& b) { \
return name<OpA, OpB>(a, b); \
}
#define NDARRAY_MAKE_EIN_BINARY_OP(name, op, is_assign_) \
template <class OpA, class OpB> \
struct name : public ein_binary_op<OpA, OpB, name<OpA, OpB>> { \
using base = ein_binary_op<OpA, OpB, name>; \
name(const OpA& a, const OpB& b) : base(a, b) {} \
using is_assign = is_assign_; \
template <class Idx> \
NDARRAY_INLINE auto operator()(const Idx& i) const { \
return base::op_a(i) op base::op_b(i); \
} \
}; \
NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)
#define NDARRAY_MAKE_EIN_BINARY_FN(name, fn) \
template <class OpA, class OpB> \
struct name : public ein_binary_op<OpA, OpB, name<OpA, OpB>> { \
using base = ein_binary_op<OpA, OpB, name>; \
name(const OpA& a, const OpB& b) : base(a, b) {} \
template <class Idx> \
NDARRAY_INLINE auto operator()(const Idx& i) const { \
using internal::min; \
using internal::max; \
return fn(base::op_a(i), base::op_b(i)); \
} \
}; \
NDARRAY_MAKE_EIN_BINARY_HELPERS(name, op)
// Define the expression types for the operations we support.
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_add, +, std::false_type);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_sub, -, std::false_type);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_mul, *, std::false_type);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_div, /, std::false_type);
NDARRAY_MAKE_EIN_BINARY_FN(ein_op_min, min);
NDARRAY_MAKE_EIN_BINARY_FN(ein_op_max, max);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_assign, =, std::true_type);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_add_assign, +=, std::true_type);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_sub_assign, -=, std::true_type);
NDARRAY_MAKE_EIN_BINARY_OP(ein_op_mul_assign, *=, std::true_type);
#undef NDARRAY_MAKE_EIN_BINARY_FN
#undef NDARRAY_MAKE_EIN_BINARY_OP
#undef NDARRAY_MAKE_EIN_BINARY_HELPERS
} // namespace internal
/** Cast an Einstein summation operand to a different type `Type`.
* The cast is performed using `static_cast<Type>`. */
template <class Type, class Op>
auto cast(const internal::ein_op_base<Op>& op) {
return internal::ein_cast_op<Type, Op>(op.derived());
}
/** The min or max of two Einstein summation operands. */
template <class OpA, class OpB>
auto min(const internal::ein_op_base<OpA>& a, const internal::ein_op_base<OpB>& b) {
return internal::make_ein_op_min(a.derived(), b.derived());
}
template <class OpA, class OpB>
auto max(const internal::ein_op_base<OpA>& a, const internal::ein_op_base<OpB>& b) {
return internal::make_ein_op_max(a.derived(), b.derived());
}
namespace internal {
// Let these be found via ADL too.
using nda::cast;
using nda::max;
using nda::min;
// If multiple operands provide the same dim, we need to reconcile them
// to one dim. If you follow a compiler or runtime error here, your
// Einstein expression tries to address two dimensions that have different
// bounds with the same loop variable.
// TODO: It would be nice if this error would appear when constructing the
// Einstein expression when possible, but that's really hard to do.
template <class Dim0, class... Dims,
class = std::enable_if_t<!any(not_equal(Dim0::Min, Dims::Min)...)>,
class = std::enable_if_t<!any(not_equal(Dim0::Extent, Dims::Extent)...)>>
NDARRAY_INLINE const Dim0& reconcile_dim(const Dim0& dim0, const Dims&... dims) {
if (dim0.stride() != 0) {
// If the first dim is an output dimension, just require the other dims
// to be in-bounds. This is a slightly relaxed requirement compared to the
// compile-time constraints.
assert(all(dims.is_in_range(dim0)...));
} else {
// If this is a reduction dimension, all of the dims must match. For
// example, this is the constraint that checks that the inner dimensions of
// a matrix multiplication are compatible.
assert(all(dim0.min() == dims.min()...));
assert(all(dim0.extent() == dims.extent()...));
}
return dim0;
}
// If we have zero dims, the user skipped a dim index, so we need a dummy
// loop.
NDARRAY_INLINE dim<0, 1, 0> reconcile_dim() { return {}; }
template <class... Dims, size_t... Is>
NDARRAY_INLINE auto reconcile_dim(const std::tuple<Dims...>& dims, index_sequence<Is...>) {
return reconcile_dim(std::get<Is>(dims)...);
}
template <class... Dims>
NDARRAY_INLINE auto reconcile_dim(const std::tuple<Dims...>& dims) {
return reconcile_dim(dims, make_index_sequence<sizeof...(Dims)>());
}
// Get the shape of an ein_reduce operand, or an empty shape if not an array.
template <class T, class Shape>
NDARRAY_INLINE const auto& dims_of(const array_ref<T, Shape>& op) {
return op.shape().dims();
}
template <class T>
NDARRAY_INLINE std::tuple<> dims_of(const T& op) {
return std::tuple<>();
}
// Helper to reinterpret a dim with a new stride.
template <index_t NewStride, index_t Min, index_t Extent, index_t Stride>
NDARRAY_INLINE auto with_stride(const dim<Min, Extent, Stride>& d) {
return dim<Min, Extent, NewStride>(d.min(), d.extent());
}
template <index_t NewStride, class Dim>
NDARRAY_INLINE auto with_stride(const std::tuple<Dim>& maybe_dim) {
return std::make_tuple(with_stride<NewStride>(std::get<0>(maybe_dim)));
}
template <index_t NewStride>
NDARRAY_INLINE std::tuple<> with_stride(std::tuple<> maybe_dim) {
return maybe_dim;
}
// These types are flags that let us overload behavior based on these 3 options.
class is_inferred_shape {};
class is_result_shape {};
class is_operand_shape {};
// Get a dim from an operand, depending on the intended use of the shape.
template <size_t Dim, class Dims, size_t... Is>
NDARRAY_INLINE auto gather_dims(is_result_shape, const ein_op<Dims, Is...>& op) {
// If this is part of the result, we want to keep its strides.
return get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op));
}
template <size_t Dim, class Dims, size_t... Is>
NDARRAY_INLINE auto gather_dims(is_inferred_shape, const ein_op<Dims, Is...>& op) {
// For inferred shapes, we want shapes without any constexpr strides, so it can be reshaped.
return with_stride<dynamic>(get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op)));
}
template <size_t Dim, class Dims, size_t... Is>
NDARRAY_INLINE auto gather_dims(is_operand_shape, const ein_op<Dims, Is...>& op) {
// If this is an operand shape, we want all of its dimensions to be stride 0.
return with_stride<0>(get_or_empty<index_of<Dim, Is...>()>(dims_of(op.op)));
}
template <size_t Dim, class Kind, class Op, class X>
NDARRAY_INLINE auto gather_dims(Kind kind, const ein_unary_op<Op, X>& op) {
return gather_dims<Dim>(kind, op.op);
}
template <size_t Dim, class Kind, class OpA, class OpB, class X>
NDARRAY_INLINE auto gather_dims(Kind kind, const ein_binary_op<OpA, OpB, X>& op) {
return std::tuple_cat(gather_dims<Dim>(kind, op.op_a), gather_dims<Dim>(kind, op.op_b));
}
template <size_t Dim, class Kind0, class Op0, class Kind1, class Op1>
NDARRAY_INLINE auto gather_dims(Kind0 kind0, const Op0& op0, Kind1 kind1, const Op1& op1) {
return std::tuple_cat(gather_dims<Dim>(kind0, op0), gather_dims<Dim>(kind1, op1));
}
template <size_t... Is, class... KindAndOps>
NDARRAY_UNIQUE auto make_ein_reduce_shape(
index_sequence<Is...>, const KindAndOps&... kind_and_ops) {
return make_shape(reconcile_dim(gather_dims<Is>(kind_and_ops...))...);
}
} // namespace internal
/** Operand for an Einstein summation, which is an array or other
* callable object, along with a set of dimension indices.
* `ein<i, j, ...>(a)` means the dimensions `i, j, ...` of the
* summation index are used to address `a` during Einstein
* summation. The number of dimensions must match the number of
* arguments of `a`. See `ein_reduce()` for more details. */
template <size_t... Is, class Op, class = internal::enable_if_callable<Op, decltype(Is)...>>
auto ein(Op op) {
return internal::ein_op<Op, Is...>(std::move(op));
}
template <size_t... Is, class T, class Shape, class Alloc,
class = std::enable_if_t<sizeof...(Is) == Shape::rank()>>
auto ein(array<T, Shape, Alloc>& op) {
return ein<Is...>(op.ref());
}
template <size_t... Is, class T, class Shape, class Alloc,
class = std::enable_if_t<sizeof...(Is) == Shape::rank()>>
auto ein(const array<T, Shape, Alloc>& op) {
return ein<Is...>(op.ref());
}
/** Define an Einstein summation operand for a scalar. The scalar
* is broadcasted as needed during the summation. Because this
* operand does not provide a shape, the dimensions of the sum
* must be inferred from other operands. See `ein_reduce()` for more
* details. */
template <class T>
auto ein(T& scalar) {
return ein<>(array_ref<T, shape<>>(&scalar, {}));
}
/** Define an Einstein summation operand for a pointer and size. */
template <size_t I0, class T>
auto ein(T* x, size_t N) {
return ein<I0>(make_array_ref(&x[0], shape<dim<0, dynamic, 1>>(static_cast<index_t>(N))));
}
/** Define an Einstein summation operand for a C array. */
template <size_t I0, class T, size_t N>
auto ein(T (&x)[N]) {
return ein<I0>(make_array_ref(&x[0], shape<dim<0, N, 1>>()));
}
/** Compute an Einstein reduction. This function allows one to specify
* many kinds of array transformations and reductions using
* <a href="https://en.wikipedia.org/wiki/Einstein_notation">Einstein notation</a>.
*
* This function accepts an expression `expr` constructed using operators
* on `ein<i, j, ...>(op)` operands. These operands describe which
* dimensions of the reduction index should be used to address that
* operand. The rank of the reduction operation is inferred from the
* number of dimensions used in the expression. The reduction operation
* is executed in the order of the indices, with the lowest numbered
* dimension executed as the innermost loop.
*
* If `expr` is a reduction operator, the result must be initialized to
* some useful value, typically the identity value for the reduction
* operator, e.g. `0` for `+=`. Not initializing the result allows
* successive `ein_reduce` operations to be applied to the same result.
*
* This function does not optimize the associative order in which the
* operations are performed. It evaluates the expression for each element
* of the final result reduction. This can be efficient for expansion
* operations, but it may be inefficient for contractions. Contractions
* may need to be reassociated manually for efficient computation.
*
* This function does not optimize the loop ordering within each operation.
* The goal of this function is to provide a low-overhead and expressive
* reduction that can be composed with other explicit loop transformations
* to achieve good performance. Various optimization strategies can be
* implemented by splitting loops appropriately and by controlling the
* order of the loops with the reduction indices.
*
* Examples:
* - `ein_reduce(ein<>(tr_A) += ein<i, i>(A))`, the trace of `A`.
* - `ein_reduce(ein<>(dot) += (ein<i>(x) + ein<i>(y)) * ein<i>(z))`,
* the dot product `(x + y)*z`.
* - `ein_reduce(ein<i, j>(AB) += ein<i, k>(A) * ein<k, j>(B))`, the matrix product `A*B`
* - `ein_reduce(ein<i>(Ax) += ein<i, j>(A) * ein<j>(x))`, the matrix-vector product `A*x`
* - `ein_reduce(ein<i>(diag_A) = ein<i, i>(A))`, the diagonal of `A`.
*
* where:
* - `A`, `B`, `AB` are matrices (rank 2 arrays)
* - `x`, `y`, `z`, `Ax` are vectors (rank 1 arrays)
* - `tr_A`, `dot` are scalar (rank 0 arrays)
* - `i`, `j`, `k` are unique constexpr integers. */
template <class Expr, class = internal::enable_if_ein_assign<Expr>>
NDARRAY_INLINE auto ein_reduce(const Expr& expr) {
constexpr index_t LoopRank = Expr::MaxIndex + 1;
// Gather the dimensions identified by the indices. gather_dims keeps the
// first dimension it finds, so we want that to be the result dimension if it
// is present. If not, this selects one of the operand dimensions, which are
// given stride 0.
auto reduction_shape = internal::make_ein_reduce_shape(internal::make_index_sequence<LoopRank>(),
internal::is_result_shape(), expr.op_a, internal::is_operand_shape(), expr.op_b);
// TODO: Try to compile-time optimize reduction_shape? :)
// This is maybe actually somewhat doable, simply moving the for_each_index
// call below into a function that could be overloaded based on the type of
// the shape would enable different optimizations. This function could be a
// member of a template parameter/parameter of this function, enabling the
// caller to customize the optimization. However, it's still very difficult
// to make useful optimizations without making some assumptions about the
// dimensions of the shape.
// Perform the reduction.
for_each_index_in_order(reduction_shape, expr);
// Assume the expr is an assignment, and return the left-hand side.
return expr.op_a.op;
}
/** Infer the shape of the result of `make_ein_reduce`. */
template <size_t... ResultIs, class Expr, class = internal::enable_if_ein_op<Expr>>
NDARRAY_UNIQUE auto make_ein_reduce_shape(const Expr& expr) {
auto result_shape = internal::make_ein_reduce_shape(
internal::index_sequence<ResultIs...>(), internal::is_inferred_shape(), expr);
return make_compact(result_shape);
}
/** Compute an Einstein summation using `ein_reduce` and return the result. The
* `value_type` of the result will be `T`, and the result shape will be inferred
* from the shape of the operands. The result is initialized to `init` prior to
* computing the summation. The Einstein summation indices for the result operand
* are `ResultIs...`.
*
* Examples:
* - `trace_A = make_ein_sum<T>(ein<i, i>(A))`
* - `dot = make_ein_sum<T>((ein<i>(x) + ein<i>(y)) * ein<i>(z))`
* - `AB = make_ein_sum<T, i, j>(ein<i, k>(A) * ein<k, j>(B))`
* - `Ax = make_ein_sum<T, i>(ein<i, j>(A) * ein<1>(x))`
*
* where:
* - `A`, `B` are matrices (rank 2 arrays)
* - `x`, `y`, `z` are vectors (rank 1 arrays)
* - `i`, `j`, `k` are unique constexpr integers.
*
* See `ein_reduce()` for more details.
**/
// TODO: It would be nice to be able to express reductions other than sums.
// TODO: Add an overload with a default ResultIs... = 0, 1, 2, ... This requires
// also inferring the rank of the result.
template <class T, size_t... ResultIs, class Expr, class Alloc = std::allocator<T>,
class = internal::enable_if_ein_op<Expr>>
NDARRAY_INLINE auto make_ein_sum(
const Expr& expr, const T& init = T(), const Alloc& alloc = Alloc()) {
auto result_shape = make_ein_reduce_shape<ResultIs...>(expr);
auto result = make_array<T>(result_shape, init, alloc);
ein_reduce(ein<ResultIs...>(result) += expr);
return result;
}
} // namespace nda
#endif // NDARRAY_EIN_REDUCE_H