From 51d9b6bebd8a7f2f780075a4d125e7261752facf Mon Sep 17 00:00:00 2001 From: Joost van Zwieten Date: Fri, 8 Mar 2024 13:15:43 +0100 Subject: [PATCH] rewrite evaluable loops using Loop base class Currently there are 2.5 implementations for evaluable loops: `LoopSum`, `LoopConcatenate` and `LoopConcatenateCombined`. The first loop is the most common one (before sparsification). The second appears after sparsification. The last loop is created only during the 'optimize for numpy' stage and is a combination of several concatenates in a single loop for performance reasons. This patch introduces a base class for evaluable loops and rewrites `LoopSum` and `LoopConcatenate` as implementations of the base class. The base class requires two methods to be implemented: one for initializing the output value and one for updating the output value for each iteration. Due to the generic nature of the base class, the method for updating the output value is guarded with a multiprocessing lock if the loop is evaluated in parallel, even when this is not necessary, e.g. for `LoopConcatenate`. To minimize the impact of locking, the lock used to increment the loop index is reused for updating the the output value. In addition this patch replaces `LoopConcatenateCombined` with `LoopTuple`, which supports any combination of loop evaluables, not just `LoopConcatenate`. --- nutils/evaluable.py | 415 ++++++++++++++++++++-------------------- tests/test_evaluable.py | 77 ++------ 2 files changed, 226 insertions(+), 266 deletions(-) diff --git a/nutils/evaluable.py b/nutils/evaluable.py index 6617343b0..e8f981e5a 100644 --- a/nutils/evaluable.py +++ b/nutils/evaluable.py @@ -50,6 +50,7 @@ import contextlib import subprocess import os +import multiprocessing graphviz = os.environ.get('NUTILS_GRAPHVIZ') @@ -302,10 +303,10 @@ def _simplified(self): @cached_property def optimized_for_numpy(self): - return self.simplified \ - ._optimized_for_numpy1 \ - ._deep_flatten_constants() \ - ._combine_loop_concatenates(frozenset()) + retval = self.simplified \ + ._optimized_for_numpy1 \ + ._deep_flatten_constants() + return retval._combine_loops(loop for loop in retval._loop_deps if loop is not retval) @util.deep_replace_property def _optimized_for_numpy1(obj): @@ -323,49 +324,37 @@ def _deep_flatten_constants(self): return self._flatten_constant() @cached_property - def _loop_concatenate_deps(self): + def _loop_deps(self) -> typing.Tuple['Loop', ...]: deps = [] - for arg in self.__args: - deps += [dep for dep in arg._loop_concatenate_deps if dep not in deps] + deps.extend(loop for arg in self.__args for loop in arg._loop_deps if loop not in deps) return tuple(deps) - def _combine_loop_concatenates(self, outer_exclude): + def _combine_loops(self, candidates): + candidates = list(candidates) while True: - exclude = set(outer_exclude) + exclude = set() combine = {} - # Collect all top-level `LoopConcatenate` instances in `combine` and all - # their dependent `LoopConcatenate` instances in `exclude`. - for lc in self._loop_concatenate_deps: - lcs = combine.setdefault(lc.index, []) - if lc not in lcs: - lcs.append(lc) - exclude.update(set(lc._loop_concatenate_deps) - {lc}) - # Combine top-level `LoopConcatenate` instances excluding those in - # `exclude`. + # Collect all top-level loops in `combine` and all their dependent + # loops instances in `exclude`. + for loop in candidates: + loops = combine.setdefault(loop.index, []) + if loop not in loops: + loops.append(loop) + exclude.update(set(loop._loop_deps) - {loop}) + # Combine top-level loop instances excluding those in `exclude`. replacements = {} - for index, lcs in combine.items(): - lcs = [lc for lc in lcs if lc not in exclude] - if not lcs: + for index, loops in combine.items(): + loops = tuple(loop for loop in loops if loop not in exclude) + candidates = [loop for loop in candidates if loop not in loops] + if len(loops) <= 1: continue - # We're extracting data from `LoopConcatenate` in favor of using - # `loop_concatenate_combined(lcs, ...)` because the later requires - # reapplying simplifications that are already applied in the former. - # For example, in `loop_concatenate_combined` the offsets (used by - # start, stop and the concatenation length) are formed by - # `loop_concatenate`-ing `func.shape[-1]`. If the shape is constant, - # this can be simplified to a `Range`. - data = Tuple(tuple(Tuple(lc.funcdata) for lc in lcs)) - # Combine `LoopConcatenate` instances in `data` excluding - # `outer_exclude` and those that will be processed in a subsequent loop - # (the remainder of `exclude`). The latter consists of loops that are - # invariant w.r.t. the current loop `index`. - data = data._combine_loop_concatenates(exclude) - combined = LoopConcatenateCombined(tuple(map(tuple, data)), index._name, index.length) - for i, lc in enumerate(lcs): - intbounds = dict(zip(('_lower', '_upper'), lc._intbounds)) if lc.dtype == int else {} - replacements[lc] = ArrayFromTuple(combined, i, lc.shape, lc.dtype, **intbounds) + combined = LoopTuple(loops, index.name, index.length) + combined = combined._combine_loops(combined._nested_loops) + for i, loop in enumerate(loops): + intbounds = dict(zip(('_lower', '_upper'), loop._intbounds)) if loop.dtype == int else {} + replacements[loop] = ArrayFromTuple(combined, i, loop.shape, loop.dtype, **intbounds) if replacements: - self = util.shallow_replace(lambda key: replacements.get(key) if isinstance(key, LoopConcatenate) else None)(self) + self = util.shallow_replace(lambda key: replacements.get(key) if isinstance(key, Loop) else None)(self) else: return self @@ -4091,6 +4080,7 @@ class _LoopIndex(Argument): def __init__(self, name: str, length: Array): assert isinstance(name, str), f'name={name!r}' assert _isindex(length), f'length={length!r}' + self.name = name self.length = length super().__init__(name, (), int) @@ -4116,72 +4106,175 @@ def _simplified(self): return Zeros((), int) -class LoopSum(Array): +class Loop(Evaluable): + '''Base class for evaluable loops. - def __init__(self, func: Array, shape: typing.Tuple[Array, ...], index_name: str, length: Array): - assert isinstance(func, Array) and func.dtype != bool, f'func={func!r}' - assert isinstance(shape, tuple) and all(_isindex(n) for n in shape), f'shape={shape!r}' + Subclasses must implement + + * method ``evalf_loop_init(init_arg)`` and + * method ``evalf_loop_body(output, body_arg)``. + ''' + + def __init__(self, index_name: str, length: Array, init_arg: Evaluable, body_arg: Evaluable, *args, **kwargs): assert isinstance(index_name, str), f'index_name={index_name!r}' - assert _isindex(length), f'length={length!r}' - assert func.ndim == len(shape) - self.index = loop_index(index_name, length) - if any(self.index in n.arguments for n in shape): - raise ValueError('the shape of the function must not depend on the index') - self.func = func - self._invariants, self._dependencies = _dependencies_sans_invariants(func, self.index) - super().__init__(args=(Tuple(shape), length, *self._invariants), shape=shape, dtype=func.dtype) + assert isinstance(length, Array), f'length={length!r}' + assert isinstance(init_arg, Evaluable), f'init_arg={init_arg!r}' + assert isinstance(body_arg, Evaluable), f'body_arg={init_arg!r}' + self.index_name = index_name + self.length = length + self.index = _LoopIndex(index_name, length) + self.init_arg = init_arg + self.body_arg = body_arg + if self.index in init_arg.arguments: + raise ValueError('the loop initialization arguments must not depend on the index') + self._invariants, self._dependencies = _dependencies_sans_invariants(body_arg, self.index) + super().__init__(args=(length, init_arg, *self._invariants), *args, **kwargs) @cached_property def _serialized_loop(self): indices = {d: i for i, d in enumerate(itertools.chain([self.index], self._invariants, self._dependencies))} return tuple((dep, tuple(map(indices.__getitem__, dep._Evaluable__args))) for dep in self._dependencies) - # This property is a derivation of `_serialized` where the `Evaluable` - # instances are mapped to the `evalf` methods of the instances. Asserting - # that functions are immutable is difficult and currently - # `types._isimmutable` marks all functions as mutable. Since the - # `types.CacheMeta` machinery asserts immutability of the property, we have - # to resort to a regular `functools.cached_property`. Nevertheless, this - # property should be treated as if it is immutable. @cached_property def _serialized_loop_evalf(self): return tuple((dep.evalf, indices) for dep, indices in self._serialized_loop) - def evalf(self, shape, length, *args): + def evalf(self, length, init_arg, *invariants): serialized_evalf = self._serialized_loop_evalf - result = numpy.zeros(shape, self.dtype) - for index in range(length): - values = [numpy.array(index)] - values.extend(args) - values.extend(op_evalf(*[values[i] for i in indices]) for op_evalf, indices in serialized_evalf) - result += values[-1] - return result - - def evalf_withtimes(self, times, shape, length, *args): + output = self.evalf_loop_init(init_arg) + length = length.__index__() + values = [None] + list(invariants) + [None] * len(serialized_evalf) + with log.context(f'loop {self.index.name}'.replace('{', '{{').replace('}', '}}') + ' {:3.0f}%', 0) as log_ctx: + fork = parallel.fork(length) + if fork: + raw_index = multiprocessing.RawValue('i', 0) + lock = multiprocessing.Lock() + with fork as pid: + with lock: + index = raw_index.value + raw_index.value = index + 1 + while index < length: + if not pid: + log_ctx(100*index/length) + values[0] = numpy.array(index) + for o, (op_evalf, indices) in enumerate(serialized_evalf, len(invariants) + 1): + values[o] = op_evalf(*[values[i] for i in indices]) + with lock: + self.evalf_loop_body(output, values[-1]) + index = raw_index.value + raw_index.value = index + 1 + else: + for index in range(length): + values[0] = numpy.array(index) + for o, (op_evalf, indices) in enumerate(serialized_evalf, len(invariants) + 1): + values[o] = op_evalf(*[values[i] for i in indices]) + self.evalf_loop_body(output, values[-1]) + log_ctx(100*(index+1)/length) + return output + + def evalf_withtimes(self, times, length, init_arg, *invariants): serialized = self._serialized_loop subtimes = times.setdefault(self, collections.defaultdict(_Stats)) - result = numpy.zeros(shape, self.dtype) + output = self.evalf_loop_init(init_arg) + values = [None] + list(invariants) + [None] * len(serialized) for index in range(length): - values = [numpy.array(index)] - values.extend(args) - values.extend(op.evalf_withtimes(subtimes, *[values[i] for i in indices]) for op, indices in serialized) - result += values[-1] - return result + values[0] = numpy.array(index) + for o, (op, indices) in enumerate(serialized, len(invariants) + 1): + values[o] = op.evalf_withtimes(subtimes, *[values[i] for i in indices]) + self.evalf_loop_body_withtimes(subtimes, output, values[-1]) + return output - def _derivative(self, var, seen): - return loop_sum(derivative(self.func, var, seen), self.index) + def evalf_loop_body_withtimes(self, times, output, body_arg): + with times[self]: + self.evalf_loop_body(output, body_arg) def _node(self, cache, subgraph, times): - if self in cache: - return cache[self] - subcache = {} - for arg in self._Evaluable__args: - subcache[arg] = arg._node(cache, subgraph, times) + if (cached := cache.get(self)) is not None: + return cached + for arg in itertools.chain(self._invariants, (self.init_arg,)): + arg._node(cache, subgraph, times) + loopcache = cache.copy() + loopcache.pop(self.index, None) loopgraph = Subgraph('Loop', subgraph) - subtimes = times.get(self, collections.defaultdict(_Stats)) - sum_kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(self.shape)} - sum_kwargs['func'] = self.func._node(subcache, loopgraph, subtimes) - cache[self] = node = RegularNode('LoopSum', (), sum_kwargs, (type(self).__name__, subtimes['sum']), loopgraph) + looptimes = times.get(self, collections.defaultdict(_Stats)) + cache[self] = node = self._node_loop_body(loopcache, loopgraph, looptimes) + return node + + @property + def _loop_deps(self) -> typing.Tuple['Loop', ...]: + deps = [self] + args = itertools.chain(self._invariants, (self.init_arg,)) + deps.extend(loop for arg in args for loop in arg._loop_deps if loop not in deps) + return tuple(deps) + + @cached_property + def _nested_loops(self) -> typing.Tuple['Loop', ...]: + nested = [] + nested.extend(loop for arg in self._dependencies for loop in arg._loop_deps if loop not in nested) + deps = self._loop_deps + return tuple(loop for loop in nested if loop not in deps) + + +class LoopTuple(Loop): + + def __init__(self, loops: typing.Tuple[Loop], index_name: str, length: Array): + assert isinstance(loops, tuple) and all(isinstance(loop, Loop) and loop.index_name == index_name and loop.length == length for loop in loops), f'loops={loops}' + self.loops = loops + super().__init__( + index_name=index_name, + length=length, + init_arg=Tuple(tuple(loop.init_arg for loop in loops)), + body_arg=Tuple(tuple(loop.body_arg for loop in loops)), + ) + + def evalf_loop_init(self, args): + return tuple(loop.evalf_loop_init(arg) for loop, arg in zip(self.loops, args)) + + def evalf_loop_body(self, outputs, args): + for loop, output, arg in zip(self.loops, outputs, args): + loop.evalf_loop_body(output, arg) + + def evalf_loop_body_withtimes(self, times, outputs, args): + for loop, output, arg in zip(self.loops, outputs, args): + loop.evalf_loop_body_withtimes(times, output, arg) + + def _node_loop_body(self, cache, subgraph, times): + if (cached := cache.get(self)) is not None: + return cached + cache[self] = node = TupleNode(tuple(item._node_loop_body(cache, subgraph, times) for item in self.loops), metadata=(type(self).__name__, times[self]), subgraph=subgraph) + return node + + @property + def _loop_deps(self) -> typing.Tuple['Loop', ...]: + deps = [] + deps.extend(dep for loop in self.loops for dep in loop._loop_deps) + return tuple(deps) + + +class LoopSum(Loop, Array): + + def __init__(self, func: Array, shape: typing.Tuple[Array, ...], index_name: str, length: Array): + assert isinstance(func, Array) and func.dtype != bool, f'func={func!r}' + assert func.ndim == len(shape) + self.func = func + super().__init__(init_arg=Tuple(shape), body_arg=func, index_name=index_name, length=length, shape=shape, dtype=func.dtype) + + def evalf_loop_init(self, shape): + return parallel.shzeros(tuple(n.__index__() for n in shape), dtype=self.dtype) + + @staticmethod + def evalf_loop_body(output, func): + output += func + + def _derivative(self, var, seen): + return loop_sum(derivative(self.func, var, seen), self.index) + + def _node_loop_body(self, cache, subgraph, times): + if (cached := cache.get(self)) is not None: + return cached + kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(self.shape)} + kwargs['func'] = self.func._node(cache, subgraph, times) + cache[self] = node = RegularNode('LoopSum', (), kwargs, (type(self).__name__, times[self]), subgraph) return node def _simplified(self): @@ -4261,39 +4354,40 @@ def _intbounds_impl(self): return 0, (0 if n == 0 or m == 0 else n * m) -class LoopConcatenate(Array): +class LoopConcatenate(Loop, Array): - def __init__(self, funcdata: typing.Tuple[Array, ...], index_name: str, length: Array): - assert isinstance(funcdata, tuple) and all(isinstance(d, Array) for d in funcdata), f'funcdata={funcdata!r}' - assert isinstance(index_name, str), f'index_name={index_name!r}' - assert _isindex(length), f'length={length!r}' - self.funcdata = funcdata - self.func, self.start, stop, *shape = funcdata - self.index = loop_index(index_name, length) + def __init__(self, func: Array, start: Array, stop: Array, shape: typing.Tuple[Array, ...], index_name: str, length: Array): + assert isinstance(func, Array), f'func={func}' + assert _isindex(start), f'start={start}' + assert _isindex(stop), f'stop={stop}' + assert isinstance(shape, tuple) and all(map(_isindex, shape)), f'shape={shape}' + self.func = func + self.start = start + self.stop = stop if not self.func.ndim: raise ValueError('expected an array with at least one axis') - if any(self.index in n.arguments for n in shape): - raise ValueError('the shape of the function must not depend on the index') - self._lcc = LoopConcatenateCombined((self.funcdata,), index_name, length) - super().__init__(args=(self._lcc,), shape=tuple(shape), dtype=self.func.dtype) + super().__init__(init_arg=Tuple(shape), body_arg=Tuple((func, start, stop)), index_name=index_name, length=length, shape=shape, dtype=func.dtype) - @staticmethod - def evalf(arg): - return arg[0] + def evalf_loop_init(self, shape): + return parallel.shempty(tuple(n.__index__() for n in shape), dtype=self.dtype) - def evalf_withtimes(self, times, arg): - with times[self]: - return arg[0] + @staticmethod + def evalf_loop_body(output, arg): + func, start, stop = arg + output[..., start:stop] = func def _derivative(self, var, seen): return Transpose.from_end(loop_concatenate(Transpose.to_end(derivative(self.func, var, seen), self.ndim-1), self.index), self.ndim-1) - def _node(self, cache, subgraph, times): - if self in cache: - return cache[self] - else: - cache[self] = node = self._lcc._node(cache, subgraph, times)[0] - return node + def _node_loop_body(self, cache, subgraph, times): + if (cached := cache.get(self)) is not None: + return cached + kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(self.shape)} + kwargs['start'] = self.start._node(cache, subgraph, times) + kwargs['stop'] = self.stop._node(cache, subgraph, times) + kwargs['func'] = self.func._node(cache, subgraph, times) + cache[self] = node = RegularNode('LoopConcatenate', (), kwargs, (type(self).__name__, times[self]), subgraph) + return node def _simplified(self): if iszero(self.func): @@ -4340,93 +4434,10 @@ def _assparse(self): chunks.append(tuple(loop_concatenate(_flat(arr), self.index) for arr in (*indices, last_index, values))) return tuple(chunks) - @property - def _loop_concatenate_deps(self): - return (self,) + super()._loop_concatenate_deps - def _intbounds_impl(self): return self.func._intbounds -class LoopConcatenateCombined(Evaluable): - - def __init__(self, funcdatas: typing.Tuple[typing.Tuple[Array, ...], ...], index_name: str, length: Array): - assert isinstance(funcdatas, tuple) and all(isinstance(funcdata, tuple) and all(isinstance(d, Array) for d in funcdata) for funcdata in funcdatas), f'funcdatas={funcdatas!r}' - assert isinstance(index_name, str), f'index_name={index_name}' - assert _isindex(length), f'length={length!r}' - self._funcdatas = funcdatas - self._funcs = tuple(func for func, start, stop, *shape in funcdatas) - self._index_name = index_name - self._index = loop_index(index_name, length) - if any(not func.ndim for func in self._funcs): - raise ValueError('expected an array with at least one axis') - shapes = tuple(Tuple(tuple(shape)) for func, start, stop, *shape in funcdatas) - if any(self._index in shape.arguments for shape in shapes): - raise ValueError('the shape of the function must not depend on the index') - self._invariants, self._dependencies = _dependencies_sans_invariants( - Tuple(tuple(Tuple((start, stop, func)) for func, start, stop, *shape in funcdatas)), self._index) - super().__init__(args=(Tuple(shapes), length, *self._invariants)) - - @cached_property - def _serialized_loop(self): - indices = {d: i for i, d in enumerate(itertools.chain([self._index], self._invariants, self._dependencies))} - return tuple((dep, tuple(map(indices.__getitem__, dep._Evaluable__args))) for dep in self._dependencies) - - # This property is a derivation of `_serialized` where the `Evaluable` - # instances are mapped to the `evalf` methods of the instances. Asserting - # that functions are immutable is difficult and currently - # `types._isimmutable` marks all functions as mutable. Since the - # `types.CacheMeta` machinery asserts immutability of the property, we have - # to resort to a regular `functools.cached_property`. Nevertheless, this - # property should be treated as if it is immutable. - @cached_property - def _serialized_loop_evalf(self): - return tuple((dep.evalf, indices) for dep, indices in self._serialized_loop) - - def evalf(self, shapes, length, *args): - serialized_evalf = self._serialized_loop_evalf - results = [parallel.shempty(tuple(map(int, shape)), dtype=func.dtype) for func, shape in zip(self._funcs, shapes)] - with parallel.ctxrange('loop {}'.format(self._index_name), int(length)) as indices: - for index in indices: - values = [numpy.array(index)] - values.extend(args) - values.extend(op_evalf(*[values[i] for i in indices]) for op_evalf, indices in serialized_evalf) - for result, (start, stop, block) in zip(results, values[-1]): - result[..., start:stop] = block - return tuple(results) - - def evalf_withtimes(self, times, shapes, length, *args): - serialized = self._serialized_loop - subtimes = times.setdefault(self, collections.defaultdict(_Stats)) - results = [parallel.shempty(tuple(map(int, shape)), dtype=func.dtype) for func, shape in zip(self._funcs, shapes)] - for index in range(length): - values = [numpy.array(index)] - values.extend(args) - values.extend(op.evalf_withtimes(subtimes, *[values[i] for i in indices]) for op, indices in serialized) - for func, result, (start, stop, block) in zip(self._funcs, results, values[-1]): - with subtimes['concat', func]: - result[..., start:stop] = block - return tuple(results) - - def _node(self, cache, subgraph, times): - if (self, 'tuple') in cache: - return cache[self, 'tuple'] - subcache = {} - for arg in self._invariants: - subcache[arg] = arg._node(cache, subgraph, times) - loopgraph = Subgraph('Loop', subgraph) - subtimes = times.get(self, collections.defaultdict(_Stats)) - concats = [] - for func, start, stop, *shape in self._funcdatas: - concat_kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(shape)} - concat_kwargs['start'] = start._node(subcache, loopgraph, subtimes) - concat_kwargs['stop'] = stop._node(subcache, loopgraph, subtimes) - concat_kwargs['func'] = func._node(subcache, loopgraph, subtimes) - concats.append(RegularNode('LoopConcatenate', (), concat_kwargs, (type(self).__name__, subtimes['concat', func]), loopgraph)) - cache[self, 'tuple'] = node = TupleNode(tuple(concats), (type(self).__name__, times[self]), subgraph) - return node - - class SearchSorted(Array): '''Find index of evaluable array into sorted numpy array.''' @@ -4916,10 +4927,10 @@ def loop_sum(func, index): func = asarray(func) if not isinstance(index, _LoopIndex): raise TypeError(f'expected _LoopIndex, got {index!r}') - return LoopSum(func, func.shape, index._name, index.length) + return LoopSum(func, func.shape, index.name, index.length) -def _loop_concatenate_data(func, index): +def loop_concatenate(func, index): func = asarray(func) if not isinstance(index, _LoopIndex): raise TypeError(f'expected _LoopIndex, got {index!r}') @@ -4931,20 +4942,8 @@ def _loop_concatenate_data(func, index): offsets = _SizesToOffsets(chunk_sizes) start = Take(offsets, index) stop = Take(offsets, index+1) - return (func, start, stop, *func.shape[:-1], Take(offsets, index.length)) - - -def loop_concatenate(func, index): - funcdata = _loop_concatenate_data(func, index) - return LoopConcatenate(funcdata, index._name, index.length) - - -def loop_concatenate_combined(funcs, index): - unique_funcs = [] - unique_funcs.extend(func for func in funcs if func not in unique_funcs) - unique_func_data = tuple(_loop_concatenate_data(func, index) for func in unique_funcs) - loop = LoopConcatenateCombined(unique_func_data, index._name, index.length) - return tuple(ArrayFromTuple(loop, unique_funcs.index(func), tuple(shape), func.dtype) for func, start, stop, *shape in unique_func_data) + shape = *func.shape[:-1], Take(offsets, index.length) + return LoopConcatenate(func, start, stop, shape, index.name, index.length) @util.shallow_replace diff --git a/tests/test_evaluable.py b/tests/test_evaluable.py index 3a5022c7c..c6eef8c20 100644 --- a/tests/test_evaluable.py +++ b/tests/test_evaluable.py @@ -593,7 +593,6 @@ def _check(name, op, n_op, *arg_values, hasgrad=True, zerograd=False, ndim=2): _check('loopsum6', lambda: evaluable.loop_sum(evaluable.Guard(evaluable.constant(1) + evaluable.loop_index('index', 4)), evaluable.loop_index('index', 4)) * evaluable.loop_sum(evaluable.loop_index('index', 4), evaluable.loop_index('index', 4)), lambda: numpy.array(60)) _check('loopconcatenate1', lambda a: evaluable.loop_concatenate(a+evaluable.prependaxes(evaluable.astype(evaluable.loop_index('index', 3), float), a.shape), evaluable.loop_index('index', 3)), lambda a: a+numpy.arange(3)[None], ANY(3, 1)) _check('loopconcatenate2', lambda: evaluable.loop_concatenate(evaluable.Elemwise(tuple(types.arraydata(numpy.arange(48).reshape(4, 4, 3)[:, :, a:b]) for a, b in util.pairwise([0, 2, 3])), evaluable.loop_index('index', 2), int), evaluable.loop_index('index', 2)), lambda: numpy.arange(48).reshape(4, 4, 3)) -_check('loopconcatenatecombined', lambda a: evaluable.loop_concatenate_combined([a+evaluable.prependaxes(evaluable.astype(evaluable.loop_index('index', 3), float), a.shape)], evaluable.loop_index('index', 3))[0], lambda a: a+numpy.arange(3)[None], ANY(3, 1), hasgrad=False) _check('legendre', lambda a: evaluable.Legendre(evaluable.asarray(a), 5), lambda a: numpy.moveaxis(numpy.polynomial.legendre.legval(a, numpy.eye(6)), 0, -1), ANY(3, 4, 3)) _check('polyval_1d_p0', lambda c, x: evaluable.Polyval(c, x), poly.eval_outer, POS(1), ANY(4, 1), ndim=1) @@ -935,35 +934,6 @@ def test_loop_concatenate(self): ' ├ %B2\n' ' └ 1\n') - @unittest.skipIf(sys.version_info < (3, 6), 'test requires dicts maintaining insertion order') - def test_loop_concatenatecombined(self): - i = evaluable.loop_index('i', 2) - f, = evaluable.loop_concatenate_combined([evaluable.InsertAxis(i, evaluable.constant(1))], i) - self.assertEqual(f.asciitree(richoutput=True), - 'SUBGRAPHS\n' - 'A\n' - '└ B = Loop\n' - 'NODES\n' - '%B0 = LoopConcatenate\n' - '├ shape[0] = %A0 = Take; i:; [2,2]\n' - '│ ├ %A1 = _SizesToOffsets; i:3; [0,2]\n' - '│ │ └ %A2 = InsertAxis; i:(2); [1,1]\n' - '│ │ ├ 1\n' - '│ │ └ 2\n' - '│ └ 2\n' - '├ start = %B1 = Take; i:; [0,2]\n' - '│ ├ %A1\n' - '│ └ %B2 = LoopIndex\n' - '│ └ length = 2\n' - '├ stop = %B3 = Take; i:; [0,2]\n' - '│ ├ %A1\n' - '│ └ %B4 = Add; i:; [1,2]\n' - '│ ├ %B2\n' - '│ └ 1\n' - '└ func = %B5 = InsertAxis; i:(1); [0,1]\n' - ' ├ %B2\n' - ' └ 1\n') - class simplify(TestCase): @@ -1105,51 +1075,42 @@ def _simplified(self): t.simplified -class combine_loop_concatenates(TestCase): +class combine_loops(TestCase): def test_same_index(self): i = evaluable.loop_index('i', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(2)), i*2, i*2+2, evaluable.constant(6)), i._name, i.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), (evaluable.InsertAxis(i, evaluable.constant(2)), i*2, i*2+2, evaluable.constant(6))), i._name, i.length) - desired = evaluable.Tuple((evaluable.ArrayFromTuple(L, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))), evaluable.ArrayFromTuple(L, 1, (evaluable.constant(6),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(2)), i) + together = evaluable.Tuple((A, B)) + actual = together._combine_loops(together._loop_deps) + L = evaluable.LoopTuple((A, B), i.name, i.length) + desired = evaluable.Tuple((evaluable.ArrayFromTuple(L, 0, A.shape, A.dtype, **dict(zip(('_lower', '_upper'), A._intbounds))), evaluable.ArrayFromTuple(L, 1, B.shape, B.dtype, **dict(zip(('_lower', '_upper'), B._intbounds))))) self.assertEqual(actual, desired) def test_different_index(self): i = evaluable.loop_index('i', 3) j = evaluable.loop_index('j', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((evaluable.InsertAxis(j, evaluable.constant(1)), j, j+1, evaluable.constant(3)), j._name, j.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L1 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+evaluable.constant(1), evaluable.constant(3)),), i._name, i.length) - L2 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(j, evaluable.constant(1)), j, j+evaluable.constant(1), evaluable.constant(3)),), j._name, j.length) - desired = evaluable.Tuple((evaluable.ArrayFromTuple(L1, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))), evaluable.ArrayFromTuple(L2, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(evaluable.InsertAxis(j, evaluable.constant(1)), j) + desired = evaluable.Tuple((A, B)) + actual = desired._combine_loops(desired._loop_deps) self.assertEqual(actual, desired) def test_nested_invariant(self): i = evaluable.loop_index('i', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((A, i*3, i*3+3, evaluable.constant(9)), i._name, i.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L1 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)),), i._name, i.length) - A_ = evaluable.ArrayFromTuple(L1, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))) - L2 = evaluable.LoopConcatenateCombined(((A_, i*3, i*3+3, evaluable.constant(9)),), i._name, i.length) - self.assertIn(A_, L2._Evaluable__args) - desired = evaluable.Tuple((A_, evaluable.ArrayFromTuple(L2, 0, (evaluable.constant(9),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(A, i) + desired = evaluable.Tuple((A, B)) + actual = desired._combine_loops(desired._loop_deps) self.assertEqual(actual, desired) def test_nested_variant(self): i = evaluable.loop_index('i', 3) j = evaluable.loop_index('j', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i+j, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((A, j*3, j*3+3, evaluable.constant(9)), j._name, j.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L1 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i+j, evaluable.constant(1)), i, i+1, evaluable.constant(3)),), i._name, i.length) - A_ = evaluable.ArrayFromTuple(L1, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))) - L2 = evaluable.LoopConcatenateCombined(((A_, j*3, j*3+3, evaluable.constant(9)),), j._name, j.length) - self.assertNotIn(A_, L2._Evaluable__args) - desired = evaluable.Tuple((A_, evaluable.ArrayFromTuple(L2, 0, (evaluable.constant(9),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i+j, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(A, j) + desired = evaluable.Tuple((A, B)) + actual = desired._combine_loops(desired._loop_deps) self.assertEqual(actual, desired)