diff --git a/nutils/evaluable.py b/nutils/evaluable.py index 6617343b0..e8f981e5a 100644 --- a/nutils/evaluable.py +++ b/nutils/evaluable.py @@ -50,6 +50,7 @@ import contextlib import subprocess import os +import multiprocessing graphviz = os.environ.get('NUTILS_GRAPHVIZ') @@ -302,10 +303,10 @@ def _simplified(self): @cached_property def optimized_for_numpy(self): - return self.simplified \ - ._optimized_for_numpy1 \ - ._deep_flatten_constants() \ - ._combine_loop_concatenates(frozenset()) + retval = self.simplified \ + ._optimized_for_numpy1 \ + ._deep_flatten_constants() + return retval._combine_loops(loop for loop in retval._loop_deps if loop is not retval) @util.deep_replace_property def _optimized_for_numpy1(obj): @@ -323,49 +324,37 @@ def _deep_flatten_constants(self): return self._flatten_constant() @cached_property - def _loop_concatenate_deps(self): + def _loop_deps(self) -> typing.Tuple['Loop', ...]: deps = [] - for arg in self.__args: - deps += [dep for dep in arg._loop_concatenate_deps if dep not in deps] + deps.extend(loop for arg in self.__args for loop in arg._loop_deps if loop not in deps) return tuple(deps) - def _combine_loop_concatenates(self, outer_exclude): + def _combine_loops(self, candidates): + candidates = list(candidates) while True: - exclude = set(outer_exclude) + exclude = set() combine = {} - # Collect all top-level `LoopConcatenate` instances in `combine` and all - # their dependent `LoopConcatenate` instances in `exclude`. - for lc in self._loop_concatenate_deps: - lcs = combine.setdefault(lc.index, []) - if lc not in lcs: - lcs.append(lc) - exclude.update(set(lc._loop_concatenate_deps) - {lc}) - # Combine top-level `LoopConcatenate` instances excluding those in - # `exclude`. + # Collect all top-level loops in `combine` and all their dependent + # loops instances in `exclude`. + for loop in candidates: + loops = combine.setdefault(loop.index, []) + if loop not in loops: + loops.append(loop) + exclude.update(set(loop._loop_deps) - {loop}) + # Combine top-level loop instances excluding those in `exclude`. replacements = {} - for index, lcs in combine.items(): - lcs = [lc for lc in lcs if lc not in exclude] - if not lcs: + for index, loops in combine.items(): + loops = tuple(loop for loop in loops if loop not in exclude) + candidates = [loop for loop in candidates if loop not in loops] + if len(loops) <= 1: continue - # We're extracting data from `LoopConcatenate` in favor of using - # `loop_concatenate_combined(lcs, ...)` because the later requires - # reapplying simplifications that are already applied in the former. - # For example, in `loop_concatenate_combined` the offsets (used by - # start, stop and the concatenation length) are formed by - # `loop_concatenate`-ing `func.shape[-1]`. If the shape is constant, - # this can be simplified to a `Range`. - data = Tuple(tuple(Tuple(lc.funcdata) for lc in lcs)) - # Combine `LoopConcatenate` instances in `data` excluding - # `outer_exclude` and those that will be processed in a subsequent loop - # (the remainder of `exclude`). The latter consists of loops that are - # invariant w.r.t. the current loop `index`. - data = data._combine_loop_concatenates(exclude) - combined = LoopConcatenateCombined(tuple(map(tuple, data)), index._name, index.length) - for i, lc in enumerate(lcs): - intbounds = dict(zip(('_lower', '_upper'), lc._intbounds)) if lc.dtype == int else {} - replacements[lc] = ArrayFromTuple(combined, i, lc.shape, lc.dtype, **intbounds) + combined = LoopTuple(loops, index.name, index.length) + combined = combined._combine_loops(combined._nested_loops) + for i, loop in enumerate(loops): + intbounds = dict(zip(('_lower', '_upper'), loop._intbounds)) if loop.dtype == int else {} + replacements[loop] = ArrayFromTuple(combined, i, loop.shape, loop.dtype, **intbounds) if replacements: - self = util.shallow_replace(lambda key: replacements.get(key) if isinstance(key, LoopConcatenate) else None)(self) + self = util.shallow_replace(lambda key: replacements.get(key) if isinstance(key, Loop) else None)(self) else: return self @@ -4091,6 +4080,7 @@ class _LoopIndex(Argument): def __init__(self, name: str, length: Array): assert isinstance(name, str), f'name={name!r}' assert _isindex(length), f'length={length!r}' + self.name = name self.length = length super().__init__(name, (), int) @@ -4116,72 +4106,175 @@ def _simplified(self): return Zeros((), int) -class LoopSum(Array): +class Loop(Evaluable): + '''Base class for evaluable loops. - def __init__(self, func: Array, shape: typing.Tuple[Array, ...], index_name: str, length: Array): - assert isinstance(func, Array) and func.dtype != bool, f'func={func!r}' - assert isinstance(shape, tuple) and all(_isindex(n) for n in shape), f'shape={shape!r}' + Subclasses must implement + + * method ``evalf_loop_init(init_arg)`` and + * method ``evalf_loop_body(output, body_arg)``. + ''' + + def __init__(self, index_name: str, length: Array, init_arg: Evaluable, body_arg: Evaluable, *args, **kwargs): assert isinstance(index_name, str), f'index_name={index_name!r}' - assert _isindex(length), f'length={length!r}' - assert func.ndim == len(shape) - self.index = loop_index(index_name, length) - if any(self.index in n.arguments for n in shape): - raise ValueError('the shape of the function must not depend on the index') - self.func = func - self._invariants, self._dependencies = _dependencies_sans_invariants(func, self.index) - super().__init__(args=(Tuple(shape), length, *self._invariants), shape=shape, dtype=func.dtype) + assert isinstance(length, Array), f'length={length!r}' + assert isinstance(init_arg, Evaluable), f'init_arg={init_arg!r}' + assert isinstance(body_arg, Evaluable), f'body_arg={init_arg!r}' + self.index_name = index_name + self.length = length + self.index = _LoopIndex(index_name, length) + self.init_arg = init_arg + self.body_arg = body_arg + if self.index in init_arg.arguments: + raise ValueError('the loop initialization arguments must not depend on the index') + self._invariants, self._dependencies = _dependencies_sans_invariants(body_arg, self.index) + super().__init__(args=(length, init_arg, *self._invariants), *args, **kwargs) @cached_property def _serialized_loop(self): indices = {d: i for i, d in enumerate(itertools.chain([self.index], self._invariants, self._dependencies))} return tuple((dep, tuple(map(indices.__getitem__, dep._Evaluable__args))) for dep in self._dependencies) - # This property is a derivation of `_serialized` where the `Evaluable` - # instances are mapped to the `evalf` methods of the instances. Asserting - # that functions are immutable is difficult and currently - # `types._isimmutable` marks all functions as mutable. Since the - # `types.CacheMeta` machinery asserts immutability of the property, we have - # to resort to a regular `functools.cached_property`. Nevertheless, this - # property should be treated as if it is immutable. @cached_property def _serialized_loop_evalf(self): return tuple((dep.evalf, indices) for dep, indices in self._serialized_loop) - def evalf(self, shape, length, *args): + def evalf(self, length, init_arg, *invariants): serialized_evalf = self._serialized_loop_evalf - result = numpy.zeros(shape, self.dtype) - for index in range(length): - values = [numpy.array(index)] - values.extend(args) - values.extend(op_evalf(*[values[i] for i in indices]) for op_evalf, indices in serialized_evalf) - result += values[-1] - return result - - def evalf_withtimes(self, times, shape, length, *args): + output = self.evalf_loop_init(init_arg) + length = length.__index__() + values = [None] + list(invariants) + [None] * len(serialized_evalf) + with log.context(f'loop {self.index.name}'.replace('{', '{{').replace('}', '}}') + ' {:3.0f}%', 0) as log_ctx: + fork = parallel.fork(length) + if fork: + raw_index = multiprocessing.RawValue('i', 0) + lock = multiprocessing.Lock() + with fork as pid: + with lock: + index = raw_index.value + raw_index.value = index + 1 + while index < length: + if not pid: + log_ctx(100*index/length) + values[0] = numpy.array(index) + for o, (op_evalf, indices) in enumerate(serialized_evalf, len(invariants) + 1): + values[o] = op_evalf(*[values[i] for i in indices]) + with lock: + self.evalf_loop_body(output, values[-1]) + index = raw_index.value + raw_index.value = index + 1 + else: + for index in range(length): + values[0] = numpy.array(index) + for o, (op_evalf, indices) in enumerate(serialized_evalf, len(invariants) + 1): + values[o] = op_evalf(*[values[i] for i in indices]) + self.evalf_loop_body(output, values[-1]) + log_ctx(100*(index+1)/length) + return output + + def evalf_withtimes(self, times, length, init_arg, *invariants): serialized = self._serialized_loop subtimes = times.setdefault(self, collections.defaultdict(_Stats)) - result = numpy.zeros(shape, self.dtype) + output = self.evalf_loop_init(init_arg) + values = [None] + list(invariants) + [None] * len(serialized) for index in range(length): - values = [numpy.array(index)] - values.extend(args) - values.extend(op.evalf_withtimes(subtimes, *[values[i] for i in indices]) for op, indices in serialized) - result += values[-1] - return result + values[0] = numpy.array(index) + for o, (op, indices) in enumerate(serialized, len(invariants) + 1): + values[o] = op.evalf_withtimes(subtimes, *[values[i] for i in indices]) + self.evalf_loop_body_withtimes(subtimes, output, values[-1]) + return output - def _derivative(self, var, seen): - return loop_sum(derivative(self.func, var, seen), self.index) + def evalf_loop_body_withtimes(self, times, output, body_arg): + with times[self]: + self.evalf_loop_body(output, body_arg) def _node(self, cache, subgraph, times): - if self in cache: - return cache[self] - subcache = {} - for arg in self._Evaluable__args: - subcache[arg] = arg._node(cache, subgraph, times) + if (cached := cache.get(self)) is not None: + return cached + for arg in itertools.chain(self._invariants, (self.init_arg,)): + arg._node(cache, subgraph, times) + loopcache = cache.copy() + loopcache.pop(self.index, None) loopgraph = Subgraph('Loop', subgraph) - subtimes = times.get(self, collections.defaultdict(_Stats)) - sum_kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(self.shape)} - sum_kwargs['func'] = self.func._node(subcache, loopgraph, subtimes) - cache[self] = node = RegularNode('LoopSum', (), sum_kwargs, (type(self).__name__, subtimes['sum']), loopgraph) + looptimes = times.get(self, collections.defaultdict(_Stats)) + cache[self] = node = self._node_loop_body(loopcache, loopgraph, looptimes) + return node + + @property + def _loop_deps(self) -> typing.Tuple['Loop', ...]: + deps = [self] + args = itertools.chain(self._invariants, (self.init_arg,)) + deps.extend(loop for arg in args for loop in arg._loop_deps if loop not in deps) + return tuple(deps) + + @cached_property + def _nested_loops(self) -> typing.Tuple['Loop', ...]: + nested = [] + nested.extend(loop for arg in self._dependencies for loop in arg._loop_deps if loop not in nested) + deps = self._loop_deps + return tuple(loop for loop in nested if loop not in deps) + + +class LoopTuple(Loop): + + def __init__(self, loops: typing.Tuple[Loop], index_name: str, length: Array): + assert isinstance(loops, tuple) and all(isinstance(loop, Loop) and loop.index_name == index_name and loop.length == length for loop in loops), f'loops={loops}' + self.loops = loops + super().__init__( + index_name=index_name, + length=length, + init_arg=Tuple(tuple(loop.init_arg for loop in loops)), + body_arg=Tuple(tuple(loop.body_arg for loop in loops)), + ) + + def evalf_loop_init(self, args): + return tuple(loop.evalf_loop_init(arg) for loop, arg in zip(self.loops, args)) + + def evalf_loop_body(self, outputs, args): + for loop, output, arg in zip(self.loops, outputs, args): + loop.evalf_loop_body(output, arg) + + def evalf_loop_body_withtimes(self, times, outputs, args): + for loop, output, arg in zip(self.loops, outputs, args): + loop.evalf_loop_body_withtimes(times, output, arg) + + def _node_loop_body(self, cache, subgraph, times): + if (cached := cache.get(self)) is not None: + return cached + cache[self] = node = TupleNode(tuple(item._node_loop_body(cache, subgraph, times) for item in self.loops), metadata=(type(self).__name__, times[self]), subgraph=subgraph) + return node + + @property + def _loop_deps(self) -> typing.Tuple['Loop', ...]: + deps = [] + deps.extend(dep for loop in self.loops for dep in loop._loop_deps) + return tuple(deps) + + +class LoopSum(Loop, Array): + + def __init__(self, func: Array, shape: typing.Tuple[Array, ...], index_name: str, length: Array): + assert isinstance(func, Array) and func.dtype != bool, f'func={func!r}' + assert func.ndim == len(shape) + self.func = func + super().__init__(init_arg=Tuple(shape), body_arg=func, index_name=index_name, length=length, shape=shape, dtype=func.dtype) + + def evalf_loop_init(self, shape): + return parallel.shzeros(tuple(n.__index__() for n in shape), dtype=self.dtype) + + @staticmethod + def evalf_loop_body(output, func): + output += func + + def _derivative(self, var, seen): + return loop_sum(derivative(self.func, var, seen), self.index) + + def _node_loop_body(self, cache, subgraph, times): + if (cached := cache.get(self)) is not None: + return cached + kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(self.shape)} + kwargs['func'] = self.func._node(cache, subgraph, times) + cache[self] = node = RegularNode('LoopSum', (), kwargs, (type(self).__name__, times[self]), subgraph) return node def _simplified(self): @@ -4261,39 +4354,40 @@ def _intbounds_impl(self): return 0, (0 if n == 0 or m == 0 else n * m) -class LoopConcatenate(Array): +class LoopConcatenate(Loop, Array): - def __init__(self, funcdata: typing.Tuple[Array, ...], index_name: str, length: Array): - assert isinstance(funcdata, tuple) and all(isinstance(d, Array) for d in funcdata), f'funcdata={funcdata!r}' - assert isinstance(index_name, str), f'index_name={index_name!r}' - assert _isindex(length), f'length={length!r}' - self.funcdata = funcdata - self.func, self.start, stop, *shape = funcdata - self.index = loop_index(index_name, length) + def __init__(self, func: Array, start: Array, stop: Array, shape: typing.Tuple[Array, ...], index_name: str, length: Array): + assert isinstance(func, Array), f'func={func}' + assert _isindex(start), f'start={start}' + assert _isindex(stop), f'stop={stop}' + assert isinstance(shape, tuple) and all(map(_isindex, shape)), f'shape={shape}' + self.func = func + self.start = start + self.stop = stop if not self.func.ndim: raise ValueError('expected an array with at least one axis') - if any(self.index in n.arguments for n in shape): - raise ValueError('the shape of the function must not depend on the index') - self._lcc = LoopConcatenateCombined((self.funcdata,), index_name, length) - super().__init__(args=(self._lcc,), shape=tuple(shape), dtype=self.func.dtype) + super().__init__(init_arg=Tuple(shape), body_arg=Tuple((func, start, stop)), index_name=index_name, length=length, shape=shape, dtype=func.dtype) - @staticmethod - def evalf(arg): - return arg[0] + def evalf_loop_init(self, shape): + return parallel.shempty(tuple(n.__index__() for n in shape), dtype=self.dtype) - def evalf_withtimes(self, times, arg): - with times[self]: - return arg[0] + @staticmethod + def evalf_loop_body(output, arg): + func, start, stop = arg + output[..., start:stop] = func def _derivative(self, var, seen): return Transpose.from_end(loop_concatenate(Transpose.to_end(derivative(self.func, var, seen), self.ndim-1), self.index), self.ndim-1) - def _node(self, cache, subgraph, times): - if self in cache: - return cache[self] - else: - cache[self] = node = self._lcc._node(cache, subgraph, times)[0] - return node + def _node_loop_body(self, cache, subgraph, times): + if (cached := cache.get(self)) is not None: + return cached + kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(self.shape)} + kwargs['start'] = self.start._node(cache, subgraph, times) + kwargs['stop'] = self.stop._node(cache, subgraph, times) + kwargs['func'] = self.func._node(cache, subgraph, times) + cache[self] = node = RegularNode('LoopConcatenate', (), kwargs, (type(self).__name__, times[self]), subgraph) + return node def _simplified(self): if iszero(self.func): @@ -4340,93 +4434,10 @@ def _assparse(self): chunks.append(tuple(loop_concatenate(_flat(arr), self.index) for arr in (*indices, last_index, values))) return tuple(chunks) - @property - def _loop_concatenate_deps(self): - return (self,) + super()._loop_concatenate_deps - def _intbounds_impl(self): return self.func._intbounds -class LoopConcatenateCombined(Evaluable): - - def __init__(self, funcdatas: typing.Tuple[typing.Tuple[Array, ...], ...], index_name: str, length: Array): - assert isinstance(funcdatas, tuple) and all(isinstance(funcdata, tuple) and all(isinstance(d, Array) for d in funcdata) for funcdata in funcdatas), f'funcdatas={funcdatas!r}' - assert isinstance(index_name, str), f'index_name={index_name}' - assert _isindex(length), f'length={length!r}' - self._funcdatas = funcdatas - self._funcs = tuple(func for func, start, stop, *shape in funcdatas) - self._index_name = index_name - self._index = loop_index(index_name, length) - if any(not func.ndim for func in self._funcs): - raise ValueError('expected an array with at least one axis') - shapes = tuple(Tuple(tuple(shape)) for func, start, stop, *shape in funcdatas) - if any(self._index in shape.arguments for shape in shapes): - raise ValueError('the shape of the function must not depend on the index') - self._invariants, self._dependencies = _dependencies_sans_invariants( - Tuple(tuple(Tuple((start, stop, func)) for func, start, stop, *shape in funcdatas)), self._index) - super().__init__(args=(Tuple(shapes), length, *self._invariants)) - - @cached_property - def _serialized_loop(self): - indices = {d: i for i, d in enumerate(itertools.chain([self._index], self._invariants, self._dependencies))} - return tuple((dep, tuple(map(indices.__getitem__, dep._Evaluable__args))) for dep in self._dependencies) - - # This property is a derivation of `_serialized` where the `Evaluable` - # instances are mapped to the `evalf` methods of the instances. Asserting - # that functions are immutable is difficult and currently - # `types._isimmutable` marks all functions as mutable. Since the - # `types.CacheMeta` machinery asserts immutability of the property, we have - # to resort to a regular `functools.cached_property`. Nevertheless, this - # property should be treated as if it is immutable. - @cached_property - def _serialized_loop_evalf(self): - return tuple((dep.evalf, indices) for dep, indices in self._serialized_loop) - - def evalf(self, shapes, length, *args): - serialized_evalf = self._serialized_loop_evalf - results = [parallel.shempty(tuple(map(int, shape)), dtype=func.dtype) for func, shape in zip(self._funcs, shapes)] - with parallel.ctxrange('loop {}'.format(self._index_name), int(length)) as indices: - for index in indices: - values = [numpy.array(index)] - values.extend(args) - values.extend(op_evalf(*[values[i] for i in indices]) for op_evalf, indices in serialized_evalf) - for result, (start, stop, block) in zip(results, values[-1]): - result[..., start:stop] = block - return tuple(results) - - def evalf_withtimes(self, times, shapes, length, *args): - serialized = self._serialized_loop - subtimes = times.setdefault(self, collections.defaultdict(_Stats)) - results = [parallel.shempty(tuple(map(int, shape)), dtype=func.dtype) for func, shape in zip(self._funcs, shapes)] - for index in range(length): - values = [numpy.array(index)] - values.extend(args) - values.extend(op.evalf_withtimes(subtimes, *[values[i] for i in indices]) for op, indices in serialized) - for func, result, (start, stop, block) in zip(self._funcs, results, values[-1]): - with subtimes['concat', func]: - result[..., start:stop] = block - return tuple(results) - - def _node(self, cache, subgraph, times): - if (self, 'tuple') in cache: - return cache[self, 'tuple'] - subcache = {} - for arg in self._invariants: - subcache[arg] = arg._node(cache, subgraph, times) - loopgraph = Subgraph('Loop', subgraph) - subtimes = times.get(self, collections.defaultdict(_Stats)) - concats = [] - for func, start, stop, *shape in self._funcdatas: - concat_kwargs = {'shape[{}]'.format(i): n._node(cache, subgraph, times) for i, n in enumerate(shape)} - concat_kwargs['start'] = start._node(subcache, loopgraph, subtimes) - concat_kwargs['stop'] = stop._node(subcache, loopgraph, subtimes) - concat_kwargs['func'] = func._node(subcache, loopgraph, subtimes) - concats.append(RegularNode('LoopConcatenate', (), concat_kwargs, (type(self).__name__, subtimes['concat', func]), loopgraph)) - cache[self, 'tuple'] = node = TupleNode(tuple(concats), (type(self).__name__, times[self]), subgraph) - return node - - class SearchSorted(Array): '''Find index of evaluable array into sorted numpy array.''' @@ -4916,10 +4927,10 @@ def loop_sum(func, index): func = asarray(func) if not isinstance(index, _LoopIndex): raise TypeError(f'expected _LoopIndex, got {index!r}') - return LoopSum(func, func.shape, index._name, index.length) + return LoopSum(func, func.shape, index.name, index.length) -def _loop_concatenate_data(func, index): +def loop_concatenate(func, index): func = asarray(func) if not isinstance(index, _LoopIndex): raise TypeError(f'expected _LoopIndex, got {index!r}') @@ -4931,20 +4942,8 @@ def _loop_concatenate_data(func, index): offsets = _SizesToOffsets(chunk_sizes) start = Take(offsets, index) stop = Take(offsets, index+1) - return (func, start, stop, *func.shape[:-1], Take(offsets, index.length)) - - -def loop_concatenate(func, index): - funcdata = _loop_concatenate_data(func, index) - return LoopConcatenate(funcdata, index._name, index.length) - - -def loop_concatenate_combined(funcs, index): - unique_funcs = [] - unique_funcs.extend(func for func in funcs if func not in unique_funcs) - unique_func_data = tuple(_loop_concatenate_data(func, index) for func in unique_funcs) - loop = LoopConcatenateCombined(unique_func_data, index._name, index.length) - return tuple(ArrayFromTuple(loop, unique_funcs.index(func), tuple(shape), func.dtype) for func, start, stop, *shape in unique_func_data) + shape = *func.shape[:-1], Take(offsets, index.length) + return LoopConcatenate(func, start, stop, shape, index.name, index.length) @util.shallow_replace diff --git a/tests/test_evaluable.py b/tests/test_evaluable.py index 3a5022c7c..c6eef8c20 100644 --- a/tests/test_evaluable.py +++ b/tests/test_evaluable.py @@ -593,7 +593,6 @@ def _check(name, op, n_op, *arg_values, hasgrad=True, zerograd=False, ndim=2): _check('loopsum6', lambda: evaluable.loop_sum(evaluable.Guard(evaluable.constant(1) + evaluable.loop_index('index', 4)), evaluable.loop_index('index', 4)) * evaluable.loop_sum(evaluable.loop_index('index', 4), evaluable.loop_index('index', 4)), lambda: numpy.array(60)) _check('loopconcatenate1', lambda a: evaluable.loop_concatenate(a+evaluable.prependaxes(evaluable.astype(evaluable.loop_index('index', 3), float), a.shape), evaluable.loop_index('index', 3)), lambda a: a+numpy.arange(3)[None], ANY(3, 1)) _check('loopconcatenate2', lambda: evaluable.loop_concatenate(evaluable.Elemwise(tuple(types.arraydata(numpy.arange(48).reshape(4, 4, 3)[:, :, a:b]) for a, b in util.pairwise([0, 2, 3])), evaluable.loop_index('index', 2), int), evaluable.loop_index('index', 2)), lambda: numpy.arange(48).reshape(4, 4, 3)) -_check('loopconcatenatecombined', lambda a: evaluable.loop_concatenate_combined([a+evaluable.prependaxes(evaluable.astype(evaluable.loop_index('index', 3), float), a.shape)], evaluable.loop_index('index', 3))[0], lambda a: a+numpy.arange(3)[None], ANY(3, 1), hasgrad=False) _check('legendre', lambda a: evaluable.Legendre(evaluable.asarray(a), 5), lambda a: numpy.moveaxis(numpy.polynomial.legendre.legval(a, numpy.eye(6)), 0, -1), ANY(3, 4, 3)) _check('polyval_1d_p0', lambda c, x: evaluable.Polyval(c, x), poly.eval_outer, POS(1), ANY(4, 1), ndim=1) @@ -935,35 +934,6 @@ def test_loop_concatenate(self): ' ├ %B2\n' ' └ 1\n') - @unittest.skipIf(sys.version_info < (3, 6), 'test requires dicts maintaining insertion order') - def test_loop_concatenatecombined(self): - i = evaluable.loop_index('i', 2) - f, = evaluable.loop_concatenate_combined([evaluable.InsertAxis(i, evaluable.constant(1))], i) - self.assertEqual(f.asciitree(richoutput=True), - 'SUBGRAPHS\n' - 'A\n' - '└ B = Loop\n' - 'NODES\n' - '%B0 = LoopConcatenate\n' - '├ shape[0] = %A0 = Take; i:; [2,2]\n' - '│ ├ %A1 = _SizesToOffsets; i:3; [0,2]\n' - '│ │ └ %A2 = InsertAxis; i:(2); [1,1]\n' - '│ │ ├ 1\n' - '│ │ └ 2\n' - '│ └ 2\n' - '├ start = %B1 = Take; i:; [0,2]\n' - '│ ├ %A1\n' - '│ └ %B2 = LoopIndex\n' - '│ └ length = 2\n' - '├ stop = %B3 = Take; i:; [0,2]\n' - '│ ├ %A1\n' - '│ └ %B4 = Add; i:; [1,2]\n' - '│ ├ %B2\n' - '│ └ 1\n' - '└ func = %B5 = InsertAxis; i:(1); [0,1]\n' - ' ├ %B2\n' - ' └ 1\n') - class simplify(TestCase): @@ -1105,51 +1075,42 @@ def _simplified(self): t.simplified -class combine_loop_concatenates(TestCase): +class combine_loops(TestCase): def test_same_index(self): i = evaluable.loop_index('i', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(2)), i*2, i*2+2, evaluable.constant(6)), i._name, i.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), (evaluable.InsertAxis(i, evaluable.constant(2)), i*2, i*2+2, evaluable.constant(6))), i._name, i.length) - desired = evaluable.Tuple((evaluable.ArrayFromTuple(L, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))), evaluable.ArrayFromTuple(L, 1, (evaluable.constant(6),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(2)), i) + together = evaluable.Tuple((A, B)) + actual = together._combine_loops(together._loop_deps) + L = evaluable.LoopTuple((A, B), i.name, i.length) + desired = evaluable.Tuple((evaluable.ArrayFromTuple(L, 0, A.shape, A.dtype, **dict(zip(('_lower', '_upper'), A._intbounds))), evaluable.ArrayFromTuple(L, 1, B.shape, B.dtype, **dict(zip(('_lower', '_upper'), B._intbounds))))) self.assertEqual(actual, desired) def test_different_index(self): i = evaluable.loop_index('i', 3) j = evaluable.loop_index('j', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((evaluable.InsertAxis(j, evaluable.constant(1)), j, j+1, evaluable.constant(3)), j._name, j.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L1 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+evaluable.constant(1), evaluable.constant(3)),), i._name, i.length) - L2 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(j, evaluable.constant(1)), j, j+evaluable.constant(1), evaluable.constant(3)),), j._name, j.length) - desired = evaluable.Tuple((evaluable.ArrayFromTuple(L1, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))), evaluable.ArrayFromTuple(L2, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(evaluable.InsertAxis(j, evaluable.constant(1)), j) + desired = evaluable.Tuple((A, B)) + actual = desired._combine_loops(desired._loop_deps) self.assertEqual(actual, desired) def test_nested_invariant(self): i = evaluable.loop_index('i', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((A, i*3, i*3+3, evaluable.constant(9)), i._name, i.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L1 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i, evaluable.constant(1)), i, i+1, evaluable.constant(3)),), i._name, i.length) - A_ = evaluable.ArrayFromTuple(L1, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))) - L2 = evaluable.LoopConcatenateCombined(((A_, i*3, i*3+3, evaluable.constant(9)),), i._name, i.length) - self.assertIn(A_, L2._Evaluable__args) - desired = evaluable.Tuple((A_, evaluable.ArrayFromTuple(L2, 0, (evaluable.constant(9),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(A, i) + desired = evaluable.Tuple((A, B)) + actual = desired._combine_loops(desired._loop_deps) self.assertEqual(actual, desired) def test_nested_variant(self): i = evaluable.loop_index('i', 3) j = evaluable.loop_index('j', 3) - A = evaluable.LoopConcatenate((evaluable.InsertAxis(i+j, evaluable.constant(1)), i, i+1, evaluable.constant(3)), i._name, i.length) - B = evaluable.LoopConcatenate((A, j*3, j*3+3, evaluable.constant(9)), j._name, j.length) - actual = evaluable.Tuple((A, B))._combine_loop_concatenates(set()) - L1 = evaluable.LoopConcatenateCombined(((evaluable.InsertAxis(i+j, evaluable.constant(1)), i, i+1, evaluable.constant(3)),), i._name, i.length) - A_ = evaluable.ArrayFromTuple(L1, 0, (evaluable.constant(3),), int, **dict(zip(('_lower', '_upper'), A._intbounds))) - L2 = evaluable.LoopConcatenateCombined(((A_, j*3, j*3+3, evaluable.constant(9)),), j._name, j.length) - self.assertNotIn(A_, L2._Evaluable__args) - desired = evaluable.Tuple((A_, evaluable.ArrayFromTuple(L2, 0, (evaluable.constant(9),), int, **dict(zip(('_lower', '_upper'), B._intbounds))))) + A = evaluable.loop_concatenate(evaluable.InsertAxis(i+j, evaluable.constant(1)), i) + B = evaluable.loop_concatenate(A, j) + desired = evaluable.Tuple((A, B)) + actual = desired._combine_loops(desired._loop_deps) self.assertEqual(actual, desired)