diff --git a/pyop2/base.py b/pyop2/base.py deleted file mode 100644 index ba4929390..000000000 --- a/pyop2/base.py +++ /dev/null @@ -1,3911 +0,0 @@ -# This file is part of PyOP2 -# -# PyOP2 is Copyright (c) 2012, Imperial College London and -# others. Please see the AUTHORS file in the main source directory for -# a full list of copyright holders. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * The name of Imperial College London or that of other -# contributors may not be used to endorse or promote products -# derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS -# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. - -"""Base classes for OP2 objects, containing metadata and runtime data -information which is backend independent. Individual runtime backends should -subclass these as required to implement backend-specific features. -""" -import abc - -from enum import IntEnum -from collections import defaultdict -import itertools -import numpy as np -import ctypes -import numbers -import operator -import types -from hashlib import md5 - -from pyop2.datatypes import IntType, as_cstr, dtype_limits, ScalarType -from pyop2.configuration import configuration -from pyop2.caching import Cached, ObjectCached -from pyop2.exceptions import * -from pyop2.utils import * -from pyop2.mpi import MPI, collective, dup_comm -from pyop2.profiling import timed_region -from pyop2.sparsity import build_sparsity -from pyop2.version import __version__ as version - -from coffee.base import Node -from coffee.visitors import EstimateFlops -from functools import reduce - -import loopy - - -def _make_object(name, *args, **kwargs): - from pyop2 import sequential - return getattr(sequential, name)(*args, **kwargs) - - -# Data API - -class Access(IntEnum): - READ = 1 - WRITE = 2 - RW = 3 - INC = 4 - MIN = 5 - MAX = 6 - - -READ = Access.READ -"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed read-only.""" - -WRITE = Access.WRITE -"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed write-only, -and OP2 is not required to handle write conflicts.""" - -RW = Access.RW -"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed for reading -and writing, and OP2 is not required to handle write conflicts.""" - -INC = Access.INC -"""The kernel computes increments to be summed onto a :class:`Global`, -:class:`Dat`, or :class:`Mat`. OP2 is responsible for managing the write -conflicts caused.""" - -MIN = Access.MIN -"""The kernel contributes to a reduction into a :class:`Global` using a ``min`` -operation. OP2 is responsible for reducing over the different kernel -invocations.""" - -MAX = Access.MAX -"""The kernel contributes to a reduction into a :class:`Global` using a ``max`` -operation. OP2 is responsible for reducing over the different kernel -invocations.""" - -# Data API - - -class Arg(object): - - """An argument to a :func:`pyop2.op2.par_loop`. - - .. warning :: - User code should not directly instantiate :class:`Arg`. - Instead, use the call syntax on the :class:`DataCarrier`. - """ - - def __init__(self, data=None, map=None, access=None, lgmaps=None, unroll_map=False): - """ - :param data: A data-carrying object, either :class:`Dat` or class:`Mat` - :param map: A :class:`Map` to access this :class:`Arg` or the default - if the identity map is to be used. - :param access: An access descriptor of type :class:`Access` - :param lgmaps: For :class:`Mat` objects, a tuple of 2-tuples of local to - global maps used during assembly. - - Checks that: - - 1. the maps used are initialized i.e. have mapping data associated, and - 2. the to Set of the map used to access it matches the Set it is - defined on. - - A :class:`MapValueError` is raised if these conditions are not met.""" - self.data = data - self._map = map - if map is None: - self.map_tuple = () - elif isinstance(map, Map): - self.map_tuple = (map, ) - else: - self.map_tuple = tuple(map) - - if data is not None and hasattr(data, "dtype"): - if data.dtype.kind == "c" and (access == MIN or access == MAX): - raise ValueError("MIN and MAX access descriptors are undefined on complex data.") - self._access = access - - self.unroll_map = unroll_map - self.lgmaps = None - if self._is_mat and lgmaps is not None: - self.lgmaps = as_tuple(lgmaps) - assert len(self.lgmaps) == self.data.nblocks - else: - if lgmaps is not None: - raise ValueError("Local to global maps only for matrices") - - # Check arguments for consistency - if configuration["type_check"] and not (self._is_global or map is None): - for j, m in enumerate(map): - if m.iterset.total_size > 0 and len(m.values_with_halo) == 0: - raise MapValueError("%s is not initialized." % map) - if self._is_mat and m.toset != data.sparsity.dsets[j].set: - raise MapValueError( - "To set of %s doesn't match the set of %s." % (map, data)) - if self._is_dat and map.toset != data.dataset.set: - raise MapValueError( - "To set of %s doesn't match the set of %s." % (map, data)) - - def recreate(self, data=None, map=None, access=None, lgmaps=None, unroll_map=None): - """Creates a new Dat based on the existing Dat with the changes specified. - - :param data: A data-carrying object, either :class:`Dat` or class:`Mat` - :param map: A :class:`Map` to access this :class:`Arg` or the default - if the identity map is to be used. - :param access: An access descriptor of type :class:`Access` - :param lgmaps: For :class:`Mat` objects, a tuple of 2-tuples of local to - global maps used during assembly.""" - return type(self)(data=data or self.data, - map=map or self.map, - access=access or self.access, - lgmaps=lgmaps or self.lgmaps, - unroll_map=False if unroll_map is None else unroll_map) - - @cached_property - def _kernel_args_(self): - return self.data._kernel_args_ - - @cached_property - def _argtypes_(self): - return self.data._argtypes_ - - @cached_property - def _wrapper_cache_key_(self): - if self.map is not None: - map_ = tuple(None if m is None else m._wrapper_cache_key_ for m in self.map) - else: - map_ = self.map - return (type(self), self.access, self.data._wrapper_cache_key_, map_, self.unroll_map) - - @property - def _key(self): - return (self.data, self._map, self._access) - - def __eq__(self, other): - r""":class:`Arg`\s compare equal of they are defined on the same data, - use the same :class:`Map` with the same index and the same access - descriptor.""" - return self._key == other._key - - def __ne__(self, other): - r""":class:`Arg`\s compare equal of they are defined on the same data, - use the same :class:`Map` with the same index and the same access - descriptor.""" - return not self.__eq__(other) - - def __str__(self): - return "OP2 Arg: dat %s, map %s, access %s" % \ - (self.data, self._map, self._access) - - def __repr__(self): - return "Arg(%r, %r, %r)" % \ - (self.data, self._map, self._access) - - def __iter__(self): - for arg in self.split: - yield arg - - @cached_property - def split(self): - """Split a mixed argument into a tuple of constituent arguments.""" - if self._is_mixed_dat: - return tuple(_make_object('Arg', d, m, self._access) - for d, m in zip(self.data, self._map)) - elif self._is_mixed_mat: - rows, cols = self.data.sparsity.shape - mr, mc = self.map - return tuple(_make_object('Arg', self.data[i, j], (mr.split[i], mc.split[j]), - self._access) - for i in range(rows) for j in range(cols)) - else: - return (self,) - - @cached_property - def name(self): - """The generated argument name.""" - return "arg%d" % self.position - - @cached_property - def ctype(self): - """String representing the C type of the data in this ``Arg``.""" - return self.data.ctype - - @cached_property - def dtype(self): - """Numpy datatype of this Arg""" - return self.data.dtype - - @cached_property - def map(self): - """The :class:`Map` via which the data is to be accessed.""" - return self._map - - @cached_property - def access(self): - """Access descriptor. One of the constants of type :class:`Access`""" - return self._access - - @cached_property - def _is_dat_view(self): - return isinstance(self.data, DatView) - - @cached_property - def _is_mat(self): - return isinstance(self.data, Mat) - - @cached_property - def _is_mixed_mat(self): - return self._is_mat and self.data.sparsity.shape > (1, 1) - - @cached_property - def _is_global(self): - return isinstance(self.data, Global) - - @cached_property - def _is_global_reduction(self): - return self._is_global and self._access in [INC, MIN, MAX] - - @cached_property - def _is_dat(self): - return isinstance(self.data, Dat) - - @cached_property - def _is_mixed_dat(self): - return isinstance(self.data, MixedDat) - - @cached_property - def _is_mixed(self): - return self._is_mixed_dat or self._is_mixed_mat - - @cached_property - def _is_direct(self): - return isinstance(self.data, Dat) and self.map is None - - @cached_property - def _is_indirect(self): - return isinstance(self.data, Dat) and self.map is not None - - @collective - def global_to_local_begin(self): - """Begin halo exchange for the argument if a halo update is required. - Doing halo exchanges only makes sense for :class:`Dat` objects. - """ - assert self._is_dat, "Doing halo exchanges only makes sense for Dats" - if self._is_direct: - return - if self.access is not WRITE: - self.data.global_to_local_begin(self.access) - - @collective - def global_to_local_end(self): - """Finish halo exchange for the argument if a halo update is required. - Doing halo exchanges only makes sense for :class:`Dat` objects. - """ - assert self._is_dat, "Doing halo exchanges only makes sense for Dats" - if self._is_direct: - return - if self.access is not WRITE: - self.data.global_to_local_end(self.access) - - @collective - def local_to_global_begin(self): - assert self._is_dat, "Doing halo exchanges only makes sense for Dats" - if self._is_direct: - return - if self.access in {INC, MIN, MAX}: - self.data.local_to_global_begin(self.access) - - @collective - def local_to_global_end(self): - assert self._is_dat, "Doing halo exchanges only makes sense for Dats" - if self._is_direct: - return - if self.access in {INC, MIN, MAX}: - self.data.local_to_global_end(self.access) - - @collective - def reduction_begin(self, comm): - """Begin reduction for the argument if its access is INC, MIN, or MAX. - Doing a reduction only makes sense for :class:`Global` objects.""" - assert self._is_global, \ - "Doing global reduction only makes sense for Globals" - if self.access is not READ: - if self.access is INC: - op = MPI.SUM - elif self.access is MIN: - op = MPI.MIN - elif self.access is MAX: - op = MPI.MAX - if MPI.VERSION >= 3: - self._reduction_req = comm.Iallreduce(self.data._data, self.data._buf, op=op) - else: - comm.Allreduce(self.data._data, self.data._buf, op=op) - - @collective - def reduction_end(self, comm): - """End reduction for the argument if it is in flight. - Doing a reduction only makes sense for :class:`Global` objects.""" - assert self._is_global, \ - "Doing global reduction only makes sense for Globals" - if self.access is not READ: - if MPI.VERSION >= 3: - self._reduction_req.Wait() - self._reduction_req = None - self.data._data[:] = self.data._buf[:] - - -class Set(object): - - """OP2 set. - - :param size: The size of the set. - :type size: integer or list of four integers. - :param string name: The name of the set (optional). - :param halo: An exisiting halo to use (optional). - - When the set is employed as an iteration space in a - :func:`pyop2.op2.par_loop`, the extent of any local iteration space within - each set entry is indicated in brackets. See the example in - :func:`pyop2.op2.par_loop` for more details. - - The size of the set can either be an integer, or a list of four - integers. The latter case is used for running in parallel where - we distinguish between: - - - `CORE` (owned and not touching halo) - - `OWNED` (owned, touching halo) - - `EXECUTE HALO` (not owned, but executed over redundantly) - - `NON EXECUTE HALO` (not owned, read when executing in the execute halo) - - If a single integer is passed, we assume that we're running in - serial and there is no distinction. - - The division of set elements is: :: - - [0, CORE) - [CORE, OWNED) - [OWNED, GHOST) - - Halo send/receive data is stored on sets in a :class:`Halo`. - """ - - _CORE_SIZE = 0 - _OWNED_SIZE = 1 - _GHOST_SIZE = 2 - - _extruded = False - - _kernel_args_ = () - _argtypes_ = () - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), ) - - @validate_type(('size', (numbers.Integral, tuple, list, np.ndarray), SizeTypeError), - ('name', str, NameTypeError)) - def __init__(self, size, name=None, halo=None, comm=None): - self.comm = dup_comm(comm) - if isinstance(size, numbers.Integral): - size = [size] * 3 - size = as_tuple(size, numbers.Integral, 3) - assert size[Set._CORE_SIZE] <= size[Set._OWNED_SIZE] <= \ - size[Set._GHOST_SIZE], "Set received invalid sizes: %s" % size - self._sizes = size - self._name = name or "set_#x%x" % id(self) - self._halo = halo - self._partition_size = 1024 - # A cache of objects built on top of this set - self._cache = {} - - @cached_property - def core_size(self): - """Core set size. Owned elements not touching halo elements.""" - return self._sizes[Set._CORE_SIZE] - - @cached_property - def size(self): - """Set size, owned elements.""" - return self._sizes[Set._OWNED_SIZE] - - @cached_property - def total_size(self): - """Set size including ghost elements. - """ - return self._sizes[Set._GHOST_SIZE] - - @cached_property - def sizes(self): - """Set sizes: core, owned, execute halo, total.""" - return self._sizes - - @cached_property - def core_part(self): - return SetPartition(self, 0, self.core_size) - - @cached_property - def owned_part(self): - return SetPartition(self, self.core_size, self.size - self.core_size) - - @cached_property - def name(self): - """User-defined label""" - return self._name - - @cached_property - def halo(self): - """:class:`Halo` associated with this Set""" - return self._halo - - @property - def partition_size(self): - """Default partition size""" - return self._partition_size - - @partition_size.setter - def partition_size(self, partition_value): - """Set the partition size""" - self._partition_size = partition_value - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __getitem__(self, idx): - """Allow indexing to return self""" - assert idx == 0 - return self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - def __str__(self): - return "OP2 Set: %s with size %s" % (self._name, self.size) - - def __repr__(self): - return "Set(%r, %r)" % (self._sizes, self._name) - - def __call__(self, *indices): - """Build a :class:`Subset` from this :class:`Set` - - :arg indices: The elements of this :class:`Set` from which the - :class:`Subset` should be formed. - - """ - if len(indices) == 1: - indices = indices[0] - if np.isscalar(indices): - indices = [indices] - return _make_object('Subset', self, indices) - - def __contains__(self, dset): - """Indicate whether a given DataSet is compatible with this Set.""" - if isinstance(dset, DataSet): - return dset.set is self - else: - return False - - def __pow__(self, e): - """Derive a :class:`DataSet` with dimension ``e``""" - return _make_object('DataSet', self, dim=e) - - @cached_property - def layers(self): - """Return None (not an :class:`ExtrudedSet`).""" - return None - - def _check_operands(self, other): - if type(other) is Set: - if other is not self: - raise ValueError("Uable to perform set operations between two unrelated sets: %s and %s." % (self, other)) - elif type(other) is Subset: - if self is not other._superset: - raise TypeError("Superset mismatch: self (%s) != other._superset (%s)" % (self, other._superset)) - else: - raise TypeError("Unable to perform set operations between `Set` and %s." % (type(other), )) - - def intersection(self, other): - self._check_operands(other) - return other - - def union(self, other): - self._check_operands(other) - return self - - def difference(self, other): - self._check_operands(other) - if other is self: - return Subset(self, []) - else: - return type(other)(self, np.setdiff1d(np.asarray(range(self.total_size), dtype=IntType), other._indices)) - - def symmetric_difference(self, other): - self._check_operands(other) - return self.difference(other) - - -class GlobalSet(Set): - - _extruded = False - - """A proxy set allowing a :class:`Global` to be used in place of a - :class:`Dat` where appropriate.""" - - _kernel_args_ = () - _argtypes_ = () - - def __init__(self, comm=None): - self.comm = dup_comm(comm) - self._cache = {} - - @cached_property - def core_size(self): - return 0 - - @cached_property - def size(self): - return 1 if self.comm.rank == 0 else 0 - - @cached_property - def total_size(self): - """Total set size, including halo elements.""" - return 1 if self.comm.rank == 0 else 0 - - @cached_property - def sizes(self): - """Set sizes: core, owned, execute halo, total.""" - return (self.core_size, self.size, self.total_size) - - @cached_property - def name(self): - """User-defined label""" - return "GlobalSet" - - @cached_property - def halo(self): - """:class:`Halo` associated with this Set""" - return None - - @property - def partition_size(self): - """Default partition size""" - return None - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __getitem__(self, idx): - """Allow indexing to return self""" - assert idx == 0 - return self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - def __str__(self): - return "OP2 GlobalSet" - - def __repr__(self): - return "GlobalSet()" - - def __eq__(self, other): - # Currently all GlobalSets compare equal. - return isinstance(other, GlobalSet) - - def __hash__(self): - # Currently all GlobalSets compare equal. - return hash(type(self)) - - -class ExtrudedSet(Set): - - """OP2 ExtrudedSet. - - :param parent: The parent :class:`Set` to build this :class:`ExtrudedSet` on top of - :type parent: a :class:`Set`. - :param layers: The number of layers in this :class:`ExtrudedSet`. - :type layers: an integer, indicating the number of layers for every entity, - or an array of shape (parent.total_size, 2) giving the start - and one past the stop layer for every entity. An entry - ``a, b = layers[e, ...]`` means that the layers for entity - ``e`` run over :math:`[a, b)`. - - The number of layers indicates the number of time the base set is - extruded in the direction of the :class:`ExtrudedSet`. As a - result, there are ``layers-1`` extruded "cells" in an extruded set. - """ - - @validate_type(('parent', Set, TypeError)) - def __init__(self, parent, layers): - self._parent = parent - try: - layers = verify_reshape(layers, IntType, (parent.total_size, 2)) - self.constant_layers = False - if layers.min() < 0: - raise SizeTypeError("Bottom of layers must be >= 0") - if any(layers[:, 1] - layers[:, 0] < 1): - raise SizeTypeError("Number of layers must be >= 0") - except DataValueError: - # Legacy, integer - layers = np.asarray(layers, dtype=IntType) - if layers.shape: - raise SizeTypeError("Specifying layers per entity, but provided %s, needed (%d, 2)", - layers.shape, parent.total_size) - if layers < 2: - raise SizeTypeError("Need at least two layers, not %d", layers) - layers = np.asarray([[0, layers]], dtype=IntType) - self.constant_layers = True - - self._layers = layers - self._extruded = True - - @cached_property - def _kernel_args_(self): - return (self.layers_array.ctypes.data, ) - - @cached_property - def _argtypes_(self): - return (ctypes.c_voidp, ) - - @cached_property - def _wrapper_cache_key_(self): - return self.parent._wrapper_cache_key_ + (self.constant_layers, ) - - def __getattr__(self, name): - """Returns a :class:`Set` specific attribute.""" - value = getattr(self._parent, name) - setattr(self, name, value) - return value - - def __contains__(self, set): - return set is self.parent - - def __str__(self): - return "OP2 ExtrudedSet: %s with size %s (%s layers)" % \ - (self._name, self.size, self._layers) - - def __repr__(self): - return "ExtrudedSet(%r, %r)" % (self._parent, self._layers) - - @cached_property - def parent(self): - return self._parent - - @cached_property - def layers(self): - """The layers of this extruded set.""" - if self.constant_layers: - # Backwards compat - return self.layers_array[0, 1] - else: - raise ValueError("No single layer, use layers_array attribute") - - @cached_property - def layers_array(self): - return self._layers - - -class Subset(ExtrudedSet): - - """OP2 subset. - - :param superset: The superset of the subset. - :type superset: a :class:`Set` or a :class:`Subset`. - :param indices: Elements of the superset that form the - subset. Duplicate values are removed when constructing the subset. - :type indices: a list of integers, or a numpy array. - """ - @validate_type(('superset', Set, TypeError), - ('indices', (list, tuple, np.ndarray), TypeError)) - def __init__(self, superset, indices): - # sort and remove duplicates - indices = np.unique(indices) - if isinstance(superset, Subset): - # Unroll indices to point to those in the parent - indices = superset.indices[indices] - superset = superset.superset - assert type(superset) is Set or type(superset) is ExtrudedSet, \ - 'Subset construction failed, should not happen' - - self._superset = superset - self._indices = verify_reshape(indices, IntType, (len(indices),)) - - if len(self._indices) > 0 and (self._indices[0] < 0 or self._indices[-1] >= self._superset.total_size): - raise SubsetIndexOutOfBounds( - 'Out of bounds indices in Subset construction: [%d, %d) not [0, %d)' % - (self._indices[0], self._indices[-1], self._superset.total_size)) - - self._sizes = ((self._indices < superset.core_size).sum(), - (self._indices < superset.size).sum(), - len(self._indices)) - self._extruded = superset._extruded - - @cached_property - def _kernel_args_(self): - return self._superset._kernel_args_ + (self._indices.ctypes.data, ) - - @cached_property - def _argtypes_(self): - return self._superset._argtypes_ + (ctypes.c_voidp, ) - - # Look up any unspecified attributes on the _set. - def __getattr__(self, name): - """Returns a :class:`Set` specific attribute.""" - value = getattr(self._superset, name) - setattr(self, name, value) - return value - - def __pow__(self, e): - """Derive a :class:`DataSet` with dimension ``e``""" - raise NotImplementedError("Deriving a DataSet from a Subset is unsupported") - - def __str__(self): - return "OP2 Subset: %s with sizes %s" % \ - (self._name, self._sizes) - - def __repr__(self): - return "Subset(%r, %r)" % (self._superset, self._indices) - - def __call__(self, *indices): - """Build a :class:`Subset` from this :class:`Subset` - - :arg indices: The elements of this :class:`Subset` from which the - :class:`Subset` should be formed. - - """ - if len(indices) == 1: - indices = indices[0] - if np.isscalar(indices): - indices = [indices] - return _make_object('Subset', self, indices) - - @cached_property - def superset(self): - """Returns the superset Set""" - return self._superset - - @cached_property - def indices(self): - """Returns the indices pointing in the superset.""" - return self._indices - - @cached_property - def owned_indices(self): - """Return the indices that correspond to the owned entities of the - superset. - """ - return self.indices[self.indices < self.superset.size] - - @cached_property - def layers_array(self): - if self._superset.constant_layers: - return self._superset.layers_array - else: - return self._superset.layers_array[self.indices, ...] - - def _check_operands(self, other): - if type(other) is Set: - if other is not self._superset: - raise TypeError("Superset mismatch: self._superset (%s) != other (%s)" % (self._superset, other)) - elif type(other) is Subset: - if self._superset is not other._superset: - raise TypeError("Unable to perform set operation between subsets of mismatching supersets (%s != %s)" % (self._superset, other._superset)) - else: - raise TypeError("Unable to perform set operations between `Subset` and %s." % (type(other), )) - - def intersection(self, other): - self._check_operands(other) - if other is self._superset: - return self - else: - return type(self)(self._superset, np.intersect1d(self._indices, other._indices)) - - def union(self, other): - self._check_operands(other) - if other is self._superset: - return other - else: - return type(self)(self._superset, np.union1d(self._indices, other._indices)) - - def difference(self, other): - self._check_operands(other) - if other is self._superset: - return Subset(other, []) - else: - return type(self)(self._superset, np.setdiff1d(self._indices, other._indices)) - - def symmetric_difference(self, other): - self._check_operands(other) - if other is self._superset: - return other.symmetric_difference(self) - else: - return type(self)(self._superset, np.setxor1d(self._indices, other._indices)) - - -class SetPartition(object): - def __init__(self, set, offset, size): - self.set = set - self.offset = offset - self.size = size - - -class MixedSet(Set, ObjectCached): - r"""A container for a bag of :class:`Set`\s.""" - - def __init__(self, sets): - r""":param iterable sets: Iterable of :class:`Set`\s or :class:`ExtrudedSet`\s""" - if self._initialized: - return - self._sets = sets - assert all(s is None or isinstance(s, GlobalSet) or ((s.layers == self._sets[0].layers).all() if s.layers is not None else True) for s in sets), \ - "All components of a MixedSet must have the same number of layers." - # TODO: do all sets need the same communicator? - self.comm = reduce(lambda a, b: a or b, map(lambda s: s if s is None else s.comm, sets)) - self._initialized = True - - @cached_property - def _kernel_args_(self): - raise NotImplementedError - - @cached_property - def _argtypes_(self): - raise NotImplementedError - - @cached_property - def _wrapper_cache_key_(self): - raise NotImplementedError - - @classmethod - def _process_args(cls, sets, **kwargs): - sets = [s for s in sets] - try: - sets = as_tuple(sets, ExtrudedSet) - except TypeError: - sets = as_tuple(sets, (Set, type(None))) - cache = sets[0] - return (cache, ) + (sets, ), kwargs - - @classmethod - def _cache_key(cls, sets, **kwargs): - return sets - - def __getitem__(self, idx): - """Return :class:`Set` with index ``idx`` or a given slice of sets.""" - return self._sets[idx] - - @cached_property - def split(self): - r"""The underlying tuple of :class:`Set`\s.""" - return self._sets - - @cached_property - def core_size(self): - """Core set size. Owned elements not touching halo elements.""" - return sum(s.core_size for s in self._sets) - - @cached_property - def size(self): - """Set size, owned elements.""" - return sum(0 if s is None else s.size for s in self._sets) - - @cached_property - def total_size(self): - """Total set size, including halo elements.""" - return sum(s.total_size for s in self._sets) - - @cached_property - def sizes(self): - """Set sizes: core, owned, execute halo, total.""" - return (self.core_size, self.size, self.total_size) - - @cached_property - def name(self): - """User-defined labels.""" - return tuple(s.name for s in self._sets) - - @cached_property - def halo(self): - r""":class:`Halo`\s associated with these :class:`Set`\s.""" - halos = tuple(s.halo for s in self._sets) - return halos if any(halos) else None - - @cached_property - def _extruded(self): - return isinstance(self._sets[0], ExtrudedSet) - - @cached_property - def layers(self): - """Numbers of layers in the extruded mesh (or None if this MixedSet is not extruded).""" - return self._sets[0].layers - - def __iter__(self): - r"""Yield all :class:`Set`\s when iterated over.""" - for s in self._sets: - yield s - - def __len__(self): - """Return number of contained :class:`Set`s.""" - return len(self._sets) - - def __pow__(self, e): - """Derive a :class:`MixedDataSet` with dimensions ``e``""" - return _make_object('MixedDataSet', self._sets, e) - - def __str__(self): - return "OP2 MixedSet composed of Sets: %s" % (self._sets,) - - def __repr__(self): - return "MixedSet(%r)" % (self._sets,) - - def __eq__(self, other): - return type(self) == type(other) and self._sets == other._sets - - -class DataSet(ObjectCached): - """PyOP2 Data Set - - Set used in the op2.Dat structures to specify the dimension of the data. - """ - - @validate_type(('iter_set', Set, SetTypeError), - ('dim', (numbers.Integral, tuple, list), DimTypeError), - ('name', str, NameTypeError)) - def __init__(self, iter_set, dim=1, name=None): - if isinstance(iter_set, ExtrudedSet): - raise NotImplementedError("Not allowed!") - if self._initialized: - return - if isinstance(iter_set, Subset): - raise NotImplementedError("Deriving a DataSet from a Subset is unsupported") - self._set = iter_set - self._dim = as_tuple(dim, numbers.Integral) - self._cdim = np.prod(self._dim).item() - self._name = name or "dset_#x%x" % id(self) - self._initialized = True - - @classmethod - def _process_args(cls, *args, **kwargs): - return (args[0], ) + args, kwargs - - @classmethod - def _cache_key(cls, iter_set, dim=1, name=None): - return (iter_set, as_tuple(dim, numbers.Integral)) - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), self.dim, self._set._wrapper_cache_key_) - - def __getstate__(self): - """Extract state to pickle.""" - return self.__dict__ - - def __setstate__(self, d): - """Restore from pickled state.""" - self.__dict__.update(d) - - # Look up any unspecified attributes on the _set. - def __getattr__(self, name): - """Returns a Set specific attribute.""" - value = getattr(self.set, name) - setattr(self, name, value) - return value - - def __getitem__(self, idx): - """Allow index to return self""" - assert idx == 0 - return self - - @cached_property - def dim(self): - """The shape tuple of the values for each element of the set.""" - return self._dim - - @cached_property - def cdim(self): - """The scalar number of values for each member of the set. This is - the product of the dim tuple.""" - return self._cdim - - @cached_property - def name(self): - """Returns the name of the data set.""" - return self._name - - @cached_property - def set(self): - """Returns the parent set of the data set.""" - return self._set - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - def __str__(self): - return "OP2 DataSet: %s on set %s, with dim %s" % \ - (self._name, self._set, self._dim) - - def __repr__(self): - return "DataSet(%r, %r, %r)" % (self._set, self._dim, self._name) - - def __contains__(self, dat): - """Indicate whether a given Dat is compatible with this DataSet.""" - return dat.dataset == self - - -class GlobalDataSet(DataSet): - """A proxy :class:`DataSet` for use in a :class:`Sparsity` where the - matrix has :class:`Global` rows or columns.""" - - def __init__(self, global_): - """ - :param global_: The :class:`Global` on which this object is based.""" - - self._global = global_ - self._globalset = GlobalSet(comm=self.comm) - self._name = "gdset_#x%x" % id(self) - - @classmethod - def _cache_key(cls, *args): - return None - - @cached_property - def dim(self): - """The shape tuple of the values for each element of the set.""" - return self._global._dim - - @cached_property - def cdim(self): - """The scalar number of values for each member of the set. This is - the product of the dim tuple.""" - return self._global._cdim - - @cached_property - def name(self): - """Returns the name of the data set.""" - return self._global._name - - @cached_property - def comm(self): - """Return the communicator on which the set is defined.""" - return self._global.comm - - @cached_property - def set(self): - """Returns the parent set of the data set.""" - return self._globalset - - @cached_property - def size(self): - """The number of local entries in the Dataset (1 on rank 0)""" - return 1 if MPI.comm.rank == 0 else 0 - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - def __str__(self): - return "OP2 GlobalDataSet: %s on Global %s" % \ - (self._name, self._global) - - def __repr__(self): - return "GlobalDataSet(%r)" % (self._global) - - -class MixedDataSet(DataSet, ObjectCached): - r"""A container for a bag of :class:`DataSet`\s. - - Initialized either from a :class:`MixedSet` and an iterable or iterator of - ``dims`` of corresponding length :: - - mdset = op2.MixedDataSet(mset, [dim1, ..., dimN]) - - or from a tuple of :class:`Set`\s and an iterable of ``dims`` of - corresponding length :: - - mdset = op2.MixedDataSet([set1, ..., setN], [dim1, ..., dimN]) - - If all ``dims`` are to be the same, they can also be given as an - :class:`int` for either of above invocations :: - - mdset = op2.MixedDataSet(mset, dim) - mdset = op2.MixedDataSet([set1, ..., setN], dim) - - Initialized from a :class:`MixedSet` without explicitly specifying ``dims`` - they default to 1 :: - - mdset = op2.MixedDataSet(mset) - - Initialized from an iterable or iterator of :class:`DataSet`\s and/or - :class:`Set`\s, where :class:`Set`\s are implicitly upcast to - :class:`DataSet`\s of dim 1 :: - - mdset = op2.MixedDataSet([dset1, ..., dsetN]) - """ - - def __init__(self, arg, dims=None): - r""" - :param arg: a :class:`MixedSet` or an iterable or a generator - expression of :class:`Set`\s or :class:`DataSet`\s or a - mixture of both - :param dims: `None` (the default) or an :class:`int` or an iterable or - generator expression of :class:`int`\s, which **must** be - of same length as `arg` - - .. Warning :: - When using generator expressions for ``arg`` or ``dims``, these - **must** terminate or else will cause an infinite loop. - """ - if self._initialized: - return - self._dsets = arg - self._initialized = True - - @classmethod - def _process_args(cls, arg, dims=None): - # If the second argument is not None it is expect to be a scalar dim - # or an iterable of dims and the first is expected to be a MixedSet or - # an iterable of Sets - if dims is not None: - # If arg is a MixedSet, get its Sets tuple - sets = arg.split if isinstance(arg, MixedSet) else tuple(arg) - # If dims is a scalar, turn it into a tuple of right length - dims = (dims,) * len(sets) if isinstance(dims, int) else tuple(dims) - if len(sets) != len(dims): - raise ValueError("Got MixedSet of %d Sets but %s dims" % - (len(sets), len(dims))) - dsets = tuple(s ** d for s, d in zip(sets, dims)) - # Otherwise expect the first argument to be an iterable of Sets and/or - # DataSets and upcast Sets to DataSets as necessary - else: - arg = [s if isinstance(s, DataSet) else s ** 1 for s in arg] - dsets = as_tuple(arg, type=DataSet) - - return (dsets[0].set, ) + (dsets, ), {} - - @classmethod - def _cache_key(cls, arg, dims=None): - return arg - - @cached_property - def _wrapper_cache_key_(self): - raise NotImplementedError - - def __getitem__(self, idx): - """Return :class:`DataSet` with index ``idx`` or a given slice of datasets.""" - return self._dsets[idx] - - @cached_property - def split(self): - r"""The underlying tuple of :class:`DataSet`\s.""" - return self._dsets - - @cached_property - def dim(self): - """The shape tuple of the values for each element of the sets.""" - return tuple(s.dim for s in self._dsets) - - @cached_property - def cdim(self): - """The sum of the scalar number of values for each member of the sets. - This is the sum of products of the dim tuples.""" - return sum(s.cdim for s in self._dsets) - - @cached_property - def name(self): - """Returns the name of the data sets.""" - return tuple(s.name for s in self._dsets) - - @cached_property - def set(self): - """Returns the :class:`MixedSet` this :class:`MixedDataSet` is - defined on.""" - return MixedSet(s.set for s in self._dsets) - - def __iter__(self): - r"""Yield all :class:`DataSet`\s when iterated over.""" - for ds in self._dsets: - yield ds - - def __len__(self): - """Return number of contained :class:`DataSet`s.""" - return len(self._dsets) - - def __str__(self): - return "OP2 MixedDataSet composed of DataSets: %s" % (self._dsets,) - - def __repr__(self): - return "MixedDataSet(%r)" % (self._dsets,) - - -class Halo(object, metaclass=abc.ABCMeta): - - """A description of a halo associated with a :class:`Set`. - - The halo object describes which :class:`Set` elements are sent - where, and which :class:`Set` elements are received from where. - """ - - @abc.abstractproperty - def comm(self): - """The MPI communicator for this halo.""" - pass - - @abc.abstractproperty - def local_to_global_numbering(self): - """The mapping from process-local to process-global numbers for this halo.""" - pass - - @abc.abstractmethod - def global_to_local_begin(self, dat, insert_mode): - """Begin an exchange from global (assembled) to local (ghosted) representation. - - :arg dat: The :class:`Dat` to exchange. - :arg insert_mode: The insertion mode. - """ - pass - - @abc.abstractmethod - def global_to_local_end(self, dat, insert_mode): - """Finish an exchange from global (assembled) to local (ghosted) representation. - - :arg dat: The :class:`Dat` to exchange. - :arg insert_mode: The insertion mode. - """ - pass - - @abc.abstractmethod - def local_to_global_begin(self, dat, insert_mode): - """Begin an exchange from local (ghosted) to global (assembled) representation. - - :arg dat: The :class:`Dat` to exchange. - :arg insert_mode: The insertion mode. - """ - pass - - @abc.abstractmethod - def local_to_global_end(self, dat, insert_mode): - """Finish an exchange from local (ghosted) to global (assembled) representation. - - :arg dat: The :class:`Dat` to exchange. - :arg insert_mode: The insertion mode. - """ - pass - - -class DataCarrier(object): - - """Abstract base class for OP2 data. - - Actual objects will be :class:`DataCarrier` objects of rank 0 - (:class:`Global`), rank 1 (:class:`Dat`), or rank 2 - (:class:`Mat`)""" - - @cached_property - def dtype(self): - """The Python type of the data.""" - return self._data.dtype - - @cached_property - def ctype(self): - """The c type of the data.""" - return as_cstr(self.dtype) - - @cached_property - def name(self): - """User-defined label.""" - return self._name - - @cached_property - def dim(self): - """The shape tuple of the values for each element of the object.""" - return self._dim - - @cached_property - def cdim(self): - """The scalar number of values for each member of the object. This is - the product of the dim tuple.""" - return self._cdim - - -class _EmptyDataMixin(object): - """A mixin for :class:`Dat` and :class:`Global` objects that takes - care of allocating data on demand if the user has passed nothing - in. - - Accessing the :attr:`_data` property allocates a zeroed data array - if it does not already exist. - """ - def __init__(self, data, dtype, shape): - if data is None: - self._dtype = np.dtype(dtype if dtype is not None else ScalarType) - else: - self._numpy_data = verify_reshape(data, dtype, shape, allow_none=True) - self._dtype = self._data.dtype - - @cached_property - def _data(self): - """Return the user-provided data buffer, or a zeroed buffer of - the correct size if none was provided.""" - if not self._is_allocated: - self._numpy_data = np.zeros(self.shape, dtype=self._dtype) - return self._numpy_data - - @property - def _is_allocated(self): - """Return True if the data buffer has been allocated.""" - return hasattr(self, '_numpy_data') - - -class Dat(DataCarrier, _EmptyDataMixin): - """OP2 vector data. A :class:`Dat` holds values on every element of a - :class:`DataSet`. - - If a :class:`Set` is passed as the ``dataset`` argument, rather - than a :class:`DataSet`, the :class:`Dat` is created with a default - :class:`DataSet` dimension of 1. - - If a :class:`Dat` is passed as the ``dataset`` argument, a copy is - returned. - - It is permissible to pass `None` as the `data` argument. In this - case, allocation of the data buffer is postponed until it is - accessed. - - .. note:: - If the data buffer is not passed in, it is implicitly - initialised to be zero. - - When a :class:`Dat` is passed to :func:`pyop2.op2.par_loop`, the map via - which indirection occurs and the access descriptor are passed by - calling the :class:`Dat`. For instance, if a :class:`Dat` named ``D`` is - to be accessed for reading via a :class:`Map` named ``M``, this is - accomplished by :: - - D(pyop2.READ, M) - - The :class:`Map` through which indirection occurs can be indexed - using the index notation described in the documentation for the - :class:`Map`. Direct access to a Dat is accomplished by - omitting the path argument. - - :class:`Dat` objects support the pointwise linear algebra operations - ``+=``, ``*=``, ``-=``, ``/=``, where ``*=`` and ``/=`` also support - multiplication / division by a scalar. - """ - - _zero_kernels = {} - """Class-level cache for zero kernels.""" - - _modes = [READ, WRITE, RW, INC, MIN, MAX] - - @cached_property - def pack(self): - from pyop2.codegen.builder import DatPack - return DatPack - - @validate_type(('dataset', (DataCarrier, DataSet, Set), DataSetTypeError), - ('name', str, NameTypeError)) - @validate_dtype(('dtype', None, DataTypeError)) - def __init__(self, dataset, data=None, dtype=None, name=None): - - if isinstance(dataset, Dat): - self.__init__(dataset.dataset, None, dtype=dataset.dtype, - name="copy_of_%s" % dataset.name) - dataset.copy(self) - return - if type(dataset) is Set or type(dataset) is ExtrudedSet: - # If a Set, rather than a dataset is passed in, default to - # a dataset dimension of 1. - dataset = dataset ** 1 - self._shape = (dataset.total_size,) + (() if dataset.cdim == 1 else dataset.dim) - _EmptyDataMixin.__init__(self, data, dtype, self._shape) - - self._dataset = dataset - self.comm = dataset.comm - self.halo_valid = True - self._name = name or "dat_#x%x" % id(self) - - @cached_property - def _kernel_args_(self): - return (self._data.ctypes.data, ) - - @cached_property - def _argtypes_(self): - return (ctypes.c_voidp, ) - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), self.dtype, self._dataset._wrapper_cache_key_) - - @validate_in(('access', _modes, ModeValueError)) - def __call__(self, access, path=None): - if configuration["type_check"] and path and path.toset != self.dataset.set: - raise MapValueError("To Set of Map does not match Set of Dat.") - return _make_object('Arg', data=self, map=path, access=access) - - def __getitem__(self, idx): - """Return self if ``idx`` is 0, raise an error otherwise.""" - if idx != 0: - raise IndexValueError("Can only extract component 0 from %r" % self) - return self - - @cached_property - def split(self): - """Tuple containing only this :class:`Dat`.""" - return (self,) - - @cached_property - def dataset(self): - """:class:`DataSet` on which the Dat is defined.""" - return self._dataset - - @cached_property - def dim(self): - """The shape of the values for each element of the object.""" - return self.dataset.dim - - @cached_property - def cdim(self): - """The scalar number of values for each member of the object. This is - the product of the dim tuple.""" - return self.dataset.cdim - - @property - @collective - def data(self): - """Numpy array containing the data values. - - With this accessor you are claiming that you will modify - the values you get back. If you only need to look at the - values, use :meth:`data_ro` instead. - - This only shows local values, to see the halo values too use - :meth:`data_with_halos`. - - """ - if self.dataset.total_size > 0 and self._data.size == 0 and self.cdim > 0: - raise RuntimeError("Illegal access: no data associated with this Dat!") - self.halo_valid = False - v = self._data[:self.dataset.size].view() - v.setflags(write=True) - return v - - @property - @collective - def data_with_halos(self): - r"""A view of this :class:`Dat`\s data. - - This accessor marks the :class:`Dat` as dirty, see - :meth:`data` for more details on the semantics. - - With this accessor, you get to see up to date halo values, but - you should not try and modify them, because they will be - overwritten by the next halo exchange.""" - self.global_to_local_begin(RW) - self.global_to_local_end(RW) - self.halo_valid = False - v = self._data.view() - v.setflags(write=True) - return v - - @property - @collective - def data_ro(self): - """Numpy array containing the data values. Read-only. - - With this accessor you are not allowed to modify the values - you get back. If you need to do so, use :meth:`data` instead. - - This only shows local values, to see the halo values too use - :meth:`data_ro_with_halos`. - - """ - if self.dataset.total_size > 0 and self._data.size == 0 and self.cdim > 0: - raise RuntimeError("Illegal access: no data associated with this Dat!") - v = self._data[:self.dataset.size].view() - v.setflags(write=False) - return v - - @property - @collective - def data_ro_with_halos(self): - r"""A view of this :class:`Dat`\s data. - - This accessor does not mark the :class:`Dat` as dirty, and is - a read only view, see :meth:`data_ro` for more details on the - semantics. - - With this accessor, you get to see up to date halo values, but - you should not try and modify them, because they will be - overwritten by the next halo exchange. - - """ - self.global_to_local_begin(READ) - self.global_to_local_end(READ) - v = self._data.view() - v.setflags(write=False) - return v - - def save(self, filename): - """Write the data array to file ``filename`` in NumPy format.""" - np.save(filename, self.data_ro) - - def load(self, filename): - """Read the data stored in file ``filename`` into a NumPy array - and store the values in :meth:`_data`. - """ - # The np.save method appends a .npy extension to the file name - # if the user has not supplied it. However, np.load does not, - # so we need to handle this ourselves here. - if(filename[-4:] != ".npy"): - filename = filename + ".npy" - - if isinstance(self.data, tuple): - # MixedDat case - for d, d_from_file in zip(self.data, np.load(filename)): - d[:] = d_from_file[:] - else: - self.data[:] = np.load(filename) - - @cached_property - def shape(self): - return self._shape - - @cached_property - def dtype(self): - return self._dtype - - @cached_property - def nbytes(self): - """Return an estimate of the size of the data associated with this - :class:`Dat` in bytes. This will be the correct size of the data - payload, but does not take into account the (presumably small) - overhead of the object and its metadata. - - Note that this is the process local memory usage, not the sum - over all MPI processes. - """ - - return self.dtype.itemsize * self.dataset.total_size * self.dataset.cdim - - @collective - def zero(self, subset=None): - """Zero the data associated with this :class:`Dat` - - :arg subset: A :class:`Subset` of entries to zero (optional).""" - # If there is no subset we can safely zero the halo values. - if subset is None: - self._data[:] = 0 - self.halo_valid = True - elif subset.superset != self.dataset.set: - raise MapValueError("The subset and dataset are incompatible") - else: - self.data[subset.owned_indices] = 0 - - @collective - def copy(self, other, subset=None): - """Copy the data in this :class:`Dat` into another. - - :arg other: The destination :class:`Dat` - :arg subset: A :class:`Subset` of elements to copy (optional)""" - if other is self: - return - if subset is None: - # If the current halo is valid we can also copy these values across. - if self.halo_valid: - other._data[:] = self._data - other.halo_valid = True - else: - other.data[:] = self.data_ro - elif subset.superset != self.dataset.set: - raise MapValueError("The subset and dataset are incompatible") - else: - other.data[subset.owned_indices] = self.data_ro[subset.owned_indices] - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - def __str__(self): - return "OP2 Dat: %s on (%s) with datatype %s" \ - % (self._name, self._dataset, self.dtype.name) - - def __repr__(self): - return "Dat(%r, None, %r, %r)" \ - % (self._dataset, self.dtype, self._name) - - def _check_shape(self, other): - if other.dataset.dim != self.dataset.dim: - raise ValueError('Mismatched shapes in operands %s and %s', - self.dataset.dim, other.dataset.dim) - - def _op_kernel(self, op, globalp, dtype): - key = (op, globalp, dtype) - try: - if not hasattr(self, "_op_kernel_cache"): - self._op_kernel_cache = {} - return self._op_kernel_cache[key] - except KeyError: - pass - import islpy as isl - import pymbolic.primitives as p - name = "binop_%s" % op.__name__ - inames = isl.make_zero_and_vars(["i"]) - domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) - _other = p.Variable("other") - _self = p.Variable("self") - _ret = p.Variable("ret") - i = p.Variable("i") - lhs = _ret.index(i) - if globalp: - rhs = _other.index(0) - rshape = (1, ) - else: - rhs = _other.index(i) - rshape = (self.cdim, ) - insn = loopy.Assignment(lhs, op(_self.index(i), rhs), within_inames=frozenset(["i"])) - data = [loopy.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), - loopy.GlobalArg("other", dtype=dtype, shape=rshape), - loopy.GlobalArg("ret", dtype=self.dtype, shape=(self.cdim,))] - knl = loopy.make_function([domain], [insn], data, name=name, target=loopy.CTarget(), lang_version=(2018, 2)) - return self._op_kernel_cache.setdefault(key, _make_object('Kernel', knl, name)) - - def _op(self, other, op): - ret = _make_object('Dat', self.dataset, None, self.dtype) - if np.isscalar(other): - other = _make_object('Global', 1, data=other) - globalp = True - else: - self._check_shape(other) - globalp = False - par_loop(self._op_kernel(op, globalp, other.dtype), - self.dataset.set, self(READ), other(READ), ret(WRITE)) - return ret - - def _iop_kernel(self, op, globalp, other_is_self, dtype): - key = (op, globalp, other_is_self, dtype) - try: - if not hasattr(self, "_iop_kernel_cache"): - self._iop_kernel_cache = {} - return self._iop_kernel_cache[key] - except KeyError: - pass - import islpy as isl - import pymbolic.primitives as p - name = "iop_%s" % op.__name__ - inames = isl.make_zero_and_vars(["i"]) - domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) - _other = p.Variable("other") - _self = p.Variable("self") - i = p.Variable("i") - lhs = _self.index(i) - rshape = (self.cdim, ) - if globalp: - rhs = _other.index(0) - rshape = (1, ) - elif other_is_self: - rhs = _self.index(i) - else: - rhs = _other.index(i) - insn = loopy.Assignment(lhs, op(lhs, rhs), within_inames=frozenset(["i"])) - data = [loopy.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))] - if not other_is_self: - data.append(loopy.GlobalArg("other", dtype=dtype, shape=rshape)) - knl = loopy.make_function([domain], [insn], data, name=name, target=loopy.CTarget(), lang_version=(2018, 2)) - return self._iop_kernel_cache.setdefault(key, _make_object('Kernel', knl, name)) - - def _iop(self, other, op): - globalp = False - if np.isscalar(other): - other = _make_object('Global', 1, data=other) - globalp = True - elif other is not self: - self._check_shape(other) - args = [self(INC)] - if other is not self: - args.append(other(READ)) - par_loop(self._iop_kernel(op, globalp, other is self, other.dtype), self.dataset.set, *args) - return self - - def _inner_kernel(self, dtype): - try: - if not hasattr(self, "_inner_kernel_cache"): - self._inner_kernel_cache = {} - return self._inner_kernel_cache[dtype] - except KeyError: - pass - import islpy as isl - import pymbolic.primitives as p - inames = isl.make_zero_and_vars(["i"]) - domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) - _self = p.Variable("self") - _other = p.Variable("other") - _ret = p.Variable("ret") - _conj = p.Variable("conj") if dtype.kind == "c" else lambda x: x - i = p.Variable("i") - insn = loopy.Assignment(_ret[0], _ret[0] + _self[i]*_conj(_other[i]), - within_inames=frozenset(["i"])) - data = [loopy.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), - loopy.GlobalArg("other", dtype=dtype, shape=(self.cdim,)), - loopy.GlobalArg("ret", dtype=self.dtype, shape=(1,))] - knl = loopy.make_function([domain], [insn], data, name="inner", target=loopy.CTarget(), lang_version=(2018, 2)) - k = _make_object('Kernel', knl, "inner") - return self._inner_kernel_cache.setdefault(dtype, k) - - def inner(self, other): - """Compute the l2 inner product of the flattened :class:`Dat` - - :arg other: the other :class:`Dat` to compute the inner - product against. The complex conjugate of this is taken. - - """ - self._check_shape(other) - ret = _make_object('Global', 1, data=0, dtype=self.dtype) - par_loop(self._inner_kernel(other.dtype), self.dataset.set, - self(READ), other(READ), ret(INC)) - return ret.data_ro[0] - - @property - def norm(self): - """Compute the l2 norm of this :class:`Dat` - - .. note:: - - This acts on the flattened data (see also :meth:`inner`).""" - from math import sqrt - return sqrt(self.inner(self).real) - - def __pos__(self): - pos = _make_object('Dat', self) - return pos - - def __add__(self, other): - """Pointwise addition of fields.""" - return self._op(other, operator.add) - - def __radd__(self, other): - """Pointwise addition of fields. - - self.__radd__(other) <==> other + self.""" - return self + other - - @cached_property - def _neg_kernel(self): - # Copy and negate in one go. - import islpy as isl - import pymbolic.primitives as p - name = "neg" - inames = isl.make_zero_and_vars(["i"]) - domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) - lvalue = p.Variable("other") - rvalue = p.Variable("self") - i = p.Variable("i") - insn = loopy.Assignment(lvalue.index(i), -rvalue.index(i), within_inames=frozenset(["i"])) - data = [loopy.GlobalArg("other", dtype=self.dtype, shape=(self.cdim,)), - loopy.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))] - knl = loopy.make_function([domain], [insn], data, name=name, target=loopy.CTarget(), lang_version=(2018, 2)) - return _make_object('Kernel', knl, name) - - def __neg__(self): - neg = _make_object('Dat', self.dataset, dtype=self.dtype) - par_loop(self._neg_kernel, self.dataset.set, neg(WRITE), self(READ)) - return neg - - def __sub__(self, other): - """Pointwise subtraction of fields.""" - return self._op(other, operator.sub) - - def __rsub__(self, other): - """Pointwise subtraction of fields. - - self.__rsub__(other) <==> other - self.""" - ret = -self - ret += other - return ret - - def __mul__(self, other): - """Pointwise multiplication or scaling of fields.""" - return self._op(other, operator.mul) - - def __rmul__(self, other): - """Pointwise multiplication or scaling of fields. - - self.__rmul__(other) <==> other * self.""" - return self.__mul__(other) - - def __truediv__(self, other): - """Pointwise division or scaling of fields.""" - return self._op(other, operator.truediv) - - __div__ = __truediv__ # Python 2 compatibility - - def __iadd__(self, other): - """Pointwise addition of fields.""" - return self._iop(other, operator.iadd) - - def __isub__(self, other): - """Pointwise subtraction of fields.""" - return self._iop(other, operator.isub) - - def __imul__(self, other): - """Pointwise multiplication or scaling of fields.""" - return self._iop(other, operator.imul) - - def __itruediv__(self, other): - """Pointwise division or scaling of fields.""" - return self._iop(other, operator.itruediv) - - @collective - def global_to_local_begin(self, access_mode): - """Begin a halo exchange from global to ghosted representation. - - :kwarg access_mode: Mode with which the data will subsequently - be accessed.""" - halo = self.dataset.halo - if halo is None: - return - if not self.halo_valid and access_mode in {READ, RW}: - halo.global_to_local_begin(self, WRITE) - elif access_mode in {INC, MIN, MAX}: - min_, max_ = dtype_limits(self.dtype) - val = {MAX: min_, MIN: max_, INC: 0}[access_mode] - self._data[self.dataset.size:] = val - else: - # WRITE - pass - - @collective - def global_to_local_end(self, access_mode): - """End a halo exchange from global to ghosted representation. - - :kwarg access_mode: Mode with which the data will subsequently - be accessed.""" - halo = self.dataset.halo - if halo is None: - return - if not self.halo_valid and access_mode in {READ, RW}: - halo.global_to_local_end(self, WRITE) - self.halo_valid = True - elif access_mode in {INC, MIN, MAX}: - self.halo_valid = False - else: - # WRITE - pass - - @collective - def local_to_global_begin(self, insert_mode): - """Begin a halo exchange from ghosted to global representation. - - :kwarg insert_mode: insertion mode (an access descriptor)""" - halo = self.dataset.halo - if halo is None: - return - halo.local_to_global_begin(self, insert_mode) - - @collective - def local_to_global_end(self, insert_mode): - """End a halo exchange from ghosted to global representation. - - :kwarg insert_mode: insertion mode (an access descriptor)""" - halo = self.dataset.halo - if halo is None: - return - halo.local_to_global_end(self, insert_mode) - self.halo_valid = False - - -class DatView(Dat): - """An indexed view into a :class:`Dat`. - - This object can be used like a :class:`Dat` but the kernel will - only see the requested index, rather than the full data. - - :arg dat: The :class:`Dat` to create a view into. - :arg index: The component to select a view of. - """ - def __init__(self, dat, index): - index = as_tuple(index) - assert len(index) == len(dat.dim) - for i, d in zip(index, dat.dim): - if not (0 <= i < d): - raise IndexValueError("Can't create DatView with index %s for Dat with shape %s" % (index, dat.dim)) - self.index = index - # Point at underlying data - super(DatView, self).__init__(dat.dataset, - dat._data, - dtype=dat.dtype, - name="view[%s](%s)" % (index, dat.name)) - self._parent = dat - - @cached_property - def _kernel_args_(self): - return self._parent._kernel_args_ - - @cached_property - def _argtypes_(self): - return self._parent._argtypes_ - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), self.index, self._parent._wrapper_cache_key_) - - @cached_property - def cdim(self): - return 1 - - @cached_property - def dim(self): - return (1, ) - - @cached_property - def shape(self): - return (self.dataset.total_size, ) - - @property - def data(self): - full = self._parent.data - idx = (slice(None), *self.index) - return full[idx] - - @property - def data_ro(self): - full = self._parent.data_ro - idx = (slice(None), *self.index) - return full[idx] - - @property - def data_with_halos(self): - full = self._parent.data_with_halos - idx = (slice(None), *self.index) - return full[idx] - - @property - def data_ro_with_halos(self): - full = self._parent.data_ro_with_halos - idx = (slice(None), *self.index) - return full[idx] - - -class MixedDat(Dat): - r"""A container for a bag of :class:`Dat`\s. - - Initialized either from a :class:`MixedDataSet`, a :class:`MixedSet`, or - an iterable of :class:`DataSet`\s and/or :class:`Set`\s, where all the - :class:`Set`\s are implcitly upcast to :class:`DataSet`\s :: - - mdat = op2.MixedDat(mdset) - mdat = op2.MixedDat([dset1, ..., dsetN]) - - or from an iterable of :class:`Dat`\s :: - - mdat = op2.MixedDat([dat1, ..., datN]) - """ - - def __init__(self, mdset_or_dats): - def what(x): - if isinstance(x, (Global, GlobalDataSet, GlobalSet)): - return "Global" - elif isinstance(x, (Dat, DataSet, Set)): - return "Dat" - else: - raise DataSetTypeError("Huh?!") - if isinstance(mdset_or_dats, MixedDat): - self._dats = tuple(_make_object(what(d), d) for d in mdset_or_dats) - else: - self._dats = tuple(d if isinstance(d, (Dat, Global)) else _make_object(what(d), d) for d in mdset_or_dats) - if not all(d.dtype == self._dats[0].dtype for d in self._dats): - raise DataValueError('MixedDat with different dtypes is not supported') - # TODO: Think about different communicators on dats (c.f. MixedSet) - self.comm = self._dats[0].comm - - @cached_property - def _kernel_args_(self): - return tuple(itertools.chain(*(d._kernel_args_ for d in self))) - - @cached_property - def _argtypes_(self): - return tuple(itertools.chain(*(d._argtypes_ for d in self))) - - @cached_property - def _wrapper_cache_key_(self): - return (type(self),) + tuple(d._wrapper_cache_key_ for d in self) - - def __getitem__(self, idx): - """Return :class:`Dat` with index ``idx`` or a given slice of Dats.""" - return self._dats[idx] - - @cached_property - def dtype(self): - """The NumPy dtype of the data.""" - return self._dats[0].dtype - - @cached_property - def split(self): - r"""The underlying tuple of :class:`Dat`\s.""" - return self._dats - - @cached_property - def dataset(self): - r""":class:`MixedDataSet`\s this :class:`MixedDat` is defined on.""" - return _make_object('MixedDataSet', tuple(s.dataset for s in self._dats)) - - @cached_property - def _data(self): - """Return the user-provided data buffer, or a zeroed buffer of - the correct size if none was provided.""" - return tuple(d._data for d in self) - - @property - @collective - def data(self): - """Numpy arrays containing the data excluding halos.""" - return tuple(s.data for s in self._dats) - - @property - @collective - def data_with_halos(self): - """Numpy arrays containing the data including halos.""" - return tuple(s.data_with_halos for s in self._dats) - - @property - @collective - def data_ro(self): - """Numpy arrays with read-only data excluding halos.""" - return tuple(s.data_ro for s in self._dats) - - @property - @collective - def data_ro_with_halos(self): - """Numpy arrays with read-only data including halos.""" - return tuple(s.data_ro_with_halos for s in self._dats) - - @property - def halo_valid(self): - """Does this Dat have up to date halos?""" - return all(s.halo_valid for s in self) - - @halo_valid.setter - def halo_valid(self, val): - """Indictate whether this Dat requires a halo update""" - for d in self: - d.halo_valid = val - - @collective - def global_to_local_begin(self, access_mode): - for s in self: - s.global_to_local_begin(access_mode) - - @collective - def global_to_local_end(self, access_mode): - for s in self: - s.global_to_local_end(access_mode) - - @collective - def local_to_global_begin(self, insert_mode): - for s in self: - s.local_to_global_begin(insert_mode) - - @collective - def local_to_global_end(self, insert_mode): - for s in self: - s.local_to_global_end(insert_mode) - - @collective - def zero(self, subset=None): - """Zero the data associated with this :class:`MixedDat`. - - :arg subset: optional subset of entries to zero (not implemented).""" - if subset is not None: - raise NotImplementedError("Subsets of mixed sets not implemented") - for d in self._dats: - d.zero() - - @cached_property - def nbytes(self): - """Return an estimate of the size of the data associated with this - :class:`MixedDat` in bytes. This will be the correct size of the data - payload, but does not take into account the (presumably small) - overhead of the object and its metadata. - - Note that this is the process local memory usage, not the sum - over all MPI processes. - """ - - return np.sum([d.nbytes for d in self._dats]) - - @collective - def copy(self, other, subset=None): - """Copy the data in this :class:`MixedDat` into another. - - :arg other: The destination :class:`MixedDat` - :arg subset: Subsets are not supported, this must be :class:`None`""" - - if subset is not None: - raise NotImplementedError("MixedDat.copy with a Subset is not supported") - for s, o in zip(self, other): - s.copy(o) - - def __iter__(self): - r"""Yield all :class:`Dat`\s when iterated over.""" - for d in self._dats: - yield d - - def __len__(self): - r"""Return number of contained :class:`Dats`\s.""" - return len(self._dats) - - def __hash__(self): - return hash(self._dats) - - def __eq__(self, other): - r""":class:`MixedDat`\s are equal if all their contained :class:`Dat`\s - are.""" - return type(self) == type(other) and self._dats == other._dats - - def __ne__(self, other): - r""":class:`MixedDat`\s are equal if all their contained :class:`Dat`\s - are.""" - return not self.__eq__(other) - - def __str__(self): - return "OP2 MixedDat composed of Dats: %s" % (self._dats,) - - def __repr__(self): - return "MixedDat(%r)" % (self._dats,) - - def inner(self, other): - """Compute the l2 inner product. - - :arg other: the other :class:`MixedDat` to compute the inner product against""" - ret = 0 - for s, o in zip(self, other): - ret += s.inner(o) - return ret - - def _op(self, other, op): - ret = [] - if np.isscalar(other): - for s in self: - ret.append(op(s, other)) - else: - self._check_shape(other) - for s, o in zip(self, other): - ret.append(op(s, o)) - return _make_object('MixedDat', ret) - - def _iop(self, other, op): - if np.isscalar(other): - for s in self: - op(s, other) - else: - self._check_shape(other) - for s, o in zip(self, other): - op(s, o) - return self - - def __pos__(self): - ret = [] - for s in self: - ret.append(s.__pos__()) - return _make_object('MixedDat', ret) - - def __neg__(self): - ret = [] - for s in self: - ret.append(s.__neg__()) - return _make_object('MixedDat', ret) - - def __add__(self, other): - """Pointwise addition of fields.""" - return self._op(other, operator.add) - - def __radd__(self, other): - """Pointwise addition of fields. - - self.__radd__(other) <==> other + self.""" - return self._op(other, operator.add) - - def __sub__(self, other): - """Pointwise subtraction of fields.""" - return self._op(other, operator.sub) - - def __rsub__(self, other): - """Pointwise subtraction of fields. - - self.__rsub__(other) <==> other - self.""" - return self._op(other, operator.sub) - - def __mul__(self, other): - """Pointwise multiplication or scaling of fields.""" - return self._op(other, operator.mul) - - def __rmul__(self, other): - """Pointwise multiplication or scaling of fields. - - self.__rmul__(other) <==> other * self.""" - return self._op(other, operator.mul) - - def __div__(self, other): - """Pointwise division or scaling of fields.""" - return self._op(other, operator.div) - - def __iadd__(self, other): - """Pointwise addition of fields.""" - return self._iop(other, operator.iadd) - - def __isub__(self, other): - """Pointwise subtraction of fields.""" - return self._iop(other, operator.isub) - - def __imul__(self, other): - """Pointwise multiplication or scaling of fields.""" - return self._iop(other, operator.imul) - - def __idiv__(self, other): - """Pointwise division or scaling of fields.""" - return self._iop(other, operator.idiv) - - -class Global(DataCarrier, _EmptyDataMixin): - - """OP2 global value. - - When a ``Global`` is passed to a :func:`pyop2.op2.par_loop`, the access - descriptor is passed by `calling` the ``Global``. For example, if - a ``Global`` named ``G`` is to be accessed for reading, this is - accomplished by:: - - G(pyop2.READ) - - It is permissible to pass `None` as the `data` argument. In this - case, allocation of the data buffer is postponed until it is - accessed. - - .. note:: - If the data buffer is not passed in, it is implicitly - initialised to be zero. - """ - - _modes = [READ, INC, MIN, MAX] - - @validate_type(('name', str, NameTypeError)) - def __init__(self, dim, data=None, dtype=None, name=None, comm=None): - if isinstance(dim, Global): - # If g is a Global, Global(g) performs a deep copy. This is for compatibility with Dat. - self.__init__(dim._dim, None, dtype=dim.dtype, - name="copy_of_%s" % dim.name, comm=dim.comm) - dim.copy(self) - return - self._dim = as_tuple(dim, int) - self._cdim = np.prod(self._dim).item() - _EmptyDataMixin.__init__(self, data, dtype, self._dim) - self._buf = np.empty(self.shape, dtype=self.dtype) - self._name = name or "global_#x%x" % id(self) - self.comm = comm - - @cached_property - def _kernel_args_(self): - return (self._data.ctypes.data, ) - - @cached_property - def _argtypes_(self): - return (ctypes.c_voidp, ) - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), self.dtype, self.shape) - - @validate_in(('access', _modes, ModeValueError)) - def __call__(self, access, path=None): - return _make_object('Arg', data=self, access=access) - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - def __getitem__(self, idx): - """Return self if ``idx`` is 0, raise an error otherwise.""" - if idx != 0: - raise IndexValueError("Can only extract component 0 from %r" % self) - return self - - def __str__(self): - return "OP2 Global Argument: %s with dim %s and value %s" \ - % (self._name, self._dim, self._data) - - def __repr__(self): - return "Global(%r, %r, %r, %r)" % (self._dim, self._data, - self._data.dtype, self._name) - - @cached_property - def dataset(self): - return _make_object('GlobalDataSet', self) - - @property - def shape(self): - return self._dim - - @property - def data(self): - """Data array.""" - if len(self._data) == 0: - raise RuntimeError("Illegal access: No data associated with this Global!") - return self._data - - @property - def dtype(self): - return self._dtype - - @property - def data_ro(self): - """Data array.""" - view = self.data.view() - view.setflags(write=False) - return view - - @data.setter - def data(self, value): - self._data[:] = verify_reshape(value, self.dtype, self.dim) - - @property - def nbytes(self): - """Return an estimate of the size of the data associated with this - :class:`Global` in bytes. This will be the correct size of the - data payload, but does not take into account the overhead of - the object and its metadata. This renders this method of - little statistical significance, however it is included to - make the interface consistent. - """ - - return self.dtype.itemsize * self._cdim - - @collective - def duplicate(self): - """Return a deep copy of self.""" - return type(self)(self.dim, data=np.copy(self.data_ro), - dtype=self.dtype, name=self.name) - - @collective - def copy(self, other, subset=None): - """Copy the data in this :class:`Global` into another. - - :arg other: The destination :class:`Global` - :arg subset: A :class:`Subset` of elements to copy (optional)""" - - other.data = np.copy(self.data_ro) - - @collective - def zero(self): - self._data[...] = 0 - - @collective - def global_to_local_begin(self, access_mode): - """Dummy halo operation for the case in which a :class:`Global` forms - part of a :class:`MixedDat`.""" - pass - - @collective - def global_to_local_end(self, access_mode): - """Dummy halo operation for the case in which a :class:`Global` forms - part of a :class:`MixedDat`.""" - pass - - @collective - def local_to_global_begin(self, insert_mode): - """Dummy halo operation for the case in which a :class:`Global` forms - part of a :class:`MixedDat`.""" - pass - - @collective - def local_to_global_end(self, insert_mode): - """Dummy halo operation for the case in which a :class:`Global` forms - part of a :class:`MixedDat`.""" - pass - - def _op(self, other, op): - ret = type(self)(self.dim, dtype=self.dtype, name=self.name, comm=self.comm) - if isinstance(other, Global): - ret.data[:] = op(self.data_ro, other.data_ro) - else: - ret.data[:] = op(self.data_ro, other) - return ret - - def _iop(self, other, op): - if isinstance(other, Global): - op(self.data[:], other.data_ro) - else: - op(self.data[:], other) - return self - - def __pos__(self): - return self.duplicate() - - def __add__(self, other): - """Pointwise addition of fields.""" - return self._op(other, operator.add) - - def __radd__(self, other): - """Pointwise addition of fields. - - self.__radd__(other) <==> other + self.""" - return self + other - - def __neg__(self): - return type(self)(self.dim, data=-np.copy(self.data_ro), - dtype=self.dtype, name=self.name) - - def __sub__(self, other): - """Pointwise subtraction of fields.""" - return self._op(other, operator.sub) - - def __rsub__(self, other): - """Pointwise subtraction of fields. - - self.__rsub__(other) <==> other - self.""" - ret = -self - ret += other - return ret - - def __mul__(self, other): - """Pointwise multiplication or scaling of fields.""" - return self._op(other, operator.mul) - - def __rmul__(self, other): - """Pointwise multiplication or scaling of fields. - - self.__rmul__(other) <==> other * self.""" - return self.__mul__(other) - - def __truediv__(self, other): - """Pointwise division or scaling of fields.""" - return self._op(other, operator.truediv) - - def __iadd__(self, other): - """Pointwise addition of fields.""" - return self._iop(other, operator.iadd) - - def __isub__(self, other): - """Pointwise subtraction of fields.""" - return self._iop(other, operator.isub) - - def __imul__(self, other): - """Pointwise multiplication or scaling of fields.""" - return self._iop(other, operator.imul) - - def __itruediv__(self, other): - """Pointwise division or scaling of fields.""" - return self._iop(other, operator.itruediv) - - def inner(self, other): - assert isinstance(other, Global) - return np.dot(self.data_ro, np.conj(other.data_ro)) - - -class Map(object): - - """OP2 map, a relation between two :class:`Set` objects. - - Each entry in the ``iterset`` maps to ``arity`` entries in the - ``toset``. When a map is used in a :func:`pyop2.op2.par_loop`, it is - possible to use Python index notation to select an individual entry on the - right hand side of this map. There are three possibilities: - - * No index. All ``arity`` :class:`Dat` entries will be passed to the - kernel. - * An integer: ``some_map[n]``. The ``n`` th entry of the - map result will be passed to the kernel. - """ - - dtype = IntType - - @validate_type(('iterset', Set, SetTypeError), ('toset', Set, SetTypeError), - ('arity', numbers.Integral, ArityTypeError), ('name', str, NameTypeError)) - def __init__(self, iterset, toset, arity, values=None, name=None, offset=None): - self._iterset = iterset - self._toset = toset - self.comm = toset.comm - self._arity = arity - self._values = verify_reshape(values, IntType, - (iterset.total_size, arity), - allow_none=True) - self.shape = (iterset.total_size, arity) - self._name = name or "map_#x%x" % id(self) - if offset is None or len(offset) == 0: - self._offset = None - else: - self._offset = verify_reshape(offset, IntType, (arity, )) - # A cache for objects built on top of this map - self._cache = {} - - @cached_property - def _kernel_args_(self): - return (self._values.ctypes.data, ) - - @cached_property - def _argtypes_(self): - return (ctypes.c_voidp, ) - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), self.arity, tuplify(self.offset)) - - # This is necessary so that we can convert a Map to a tuple - # (needed in as_tuple). Because, __getitem__ no longer returns a - # Map we have to explicitly provide an iterable interface - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __len__(self): - """This is not a mixed type and therefore of length 1.""" - return 1 - - @cached_property - def split(self): - return (self,) - - @cached_property - def iterset(self): - """:class:`Set` mapped from.""" - return self._iterset - - @cached_property - def toset(self): - """:class:`Set` mapped to.""" - return self._toset - - @cached_property - def arity(self): - """Arity of the mapping: number of toset elements mapped to per - iterset element.""" - return self._arity - - @cached_property - def arities(self): - """Arity of the mapping: number of toset elements mapped to per - iterset element. - - :rtype: tuple""" - return (self._arity,) - - @cached_property - def arange(self): - """Tuple of arity offsets for each constituent :class:`Map`.""" - return (0, self._arity) - - @cached_property - def values(self): - """Mapping array. - - This only returns the map values for local points, to see the - halo points too, use :meth:`values_with_halo`.""" - return self._values[:self.iterset.size] - - @cached_property - def values_with_halo(self): - """Mapping array. - - This returns all map values (including halo points), see - :meth:`values` if you only need to look at the local - points.""" - return self._values - - @cached_property - def name(self): - """User-defined label""" - return self._name - - @cached_property - def offset(self): - """The vertical offset.""" - return self._offset - - def __str__(self): - return "OP2 Map: %s from (%s) to (%s) with arity %s" \ - % (self._name, self._iterset, self._toset, self._arity) - - def __repr__(self): - return "Map(%r, %r, %r, None, %r)" \ - % (self._iterset, self._toset, self._arity, self._name) - - def __le__(self, o): - """self<=o if o equals self or self._parent <= o.""" - return self == o - - -class PermutedMap(Map): - """Composition of a standard :class:`Map` with a constant permutation. - - :arg map_: The map to permute. - :arg permutation: The permutation of the map indices. - - Where normally staging to element data is performed as - - .. code-block:: - - local[i] = global[map[i]] - - With a :class:`PermutedMap` we instead get - - .. code-block:: - - local[i] = global[map[permutation[i]]] - - This might be useful if your local kernel wants data in a - different order to the one that the map provides, and you don't - want two global-sized data structures. - """ - def __init__(self, map_, permutation): - self.map_ = map_ - self.permutation = np.asarray(permutation, dtype=Map.dtype) - assert (np.unique(permutation) == np.arange(map_.arity, dtype=Map.dtype)).all() - - @cached_property - def _wrapper_cache_key_(self): - return super()._wrapper_cache_key_ + (tuple(self.permutation),) - - def __getattr__(self, name): - return getattr(self.map_, name) - - -class MixedMap(Map, ObjectCached): - r"""A container for a bag of :class:`Map`\s.""" - - def __init__(self, maps): - r""":param iterable maps: Iterable of :class:`Map`\s""" - if self._initialized: - return - self._maps = maps - if not all(m is None or m.iterset == self.iterset for m in self._maps): - raise MapTypeError("All maps in a MixedMap need to share the same iterset") - # TODO: Think about different communicators on maps (c.f. MixedSet) - # TODO: What if all maps are None? - comms = tuple(m.comm for m in self._maps if m is not None) - if not all(c == comms[0] for c in comms): - raise MapTypeError("All maps needs to share a communicator") - if len(comms) == 0: - raise MapTypeError("Don't know how to make communicator") - self.comm = comms[0] - self._initialized = True - - @classmethod - def _process_args(cls, *args, **kwargs): - maps = as_tuple(args[0], type=Map, allow_none=True) - cache = maps[0] - return (cache, ) + (maps, ), kwargs - - @classmethod - def _cache_key(cls, maps): - return maps - - @cached_property - def _kernel_args_(self): - return tuple(itertools.chain(*(m._kernel_args_ for m in self if m is not None))) - - @cached_property - def _argtypes_(self): - return tuple(itertools.chain(*(m._argtypes_ for m in self if m is not None))) - - @cached_property - def _wrapper_cache_key_(self): - return tuple(m._wrapper_cache_key_ for m in self if m is not None) - - @cached_property - def split(self): - r"""The underlying tuple of :class:`Map`\s.""" - return self._maps - - @cached_property - def iterset(self): - """:class:`MixedSet` mapped from.""" - return reduce(lambda a, b: a or b, map(lambda s: s if s is None else s.iterset, self._maps)) - - @cached_property - def toset(self): - """:class:`MixedSet` mapped to.""" - return MixedSet(tuple(GlobalSet(comm=self.comm) if m is None else - m.toset for m in self._maps)) - - @cached_property - def arity(self): - """Arity of the mapping: total number of toset elements mapped to per - iterset element.""" - return sum(m.arity for m in self._maps) - - @cached_property - def arities(self): - """Arity of the mapping: number of toset elements mapped to per - iterset element. - - :rtype: tuple""" - return tuple(m.arity for m in self._maps) - - @cached_property - def arange(self): - """Tuple of arity offsets for each constituent :class:`Map`.""" - return (0,) + tuple(np.cumsum(self.arities)) - - @cached_property - def values(self): - """Mapping arrays excluding data for halos. - - This only returns the map values for local points, to see the - halo points too, use :meth:`values_with_halo`.""" - return tuple(m.values for m in self._maps) - - @cached_property - def values_with_halo(self): - """Mapping arrays including data for halos. - - This returns all map values (including halo points), see - :meth:`values` if you only need to look at the local - points.""" - return tuple(None if m is None else - m.values_with_halo for m in self._maps) - - @cached_property - def name(self): - """User-defined labels""" - return tuple(m.name for m in self._maps) - - @cached_property - def offset(self): - """Vertical offsets.""" - return tuple(0 if m is None else m.offset for m in self._maps) - - def __iter__(self): - r"""Yield all :class:`Map`\s when iterated over.""" - for m in self._maps: - yield m - - def __len__(self): - r"""Number of contained :class:`Map`\s.""" - return len(self._maps) - - def __le__(self, o): - """self<=o if o equals self or its self._parent==o.""" - return self == o or all(m <= om for m, om in zip(self, o)) - - def __str__(self): - return "OP2 MixedMap composed of Maps: %s" % (self._maps,) - - def __repr__(self): - return "MixedMap(%r)" % (self._maps,) - - -class Sparsity(ObjectCached): - - """OP2 Sparsity, the non-zero structure a matrix derived from the union of - the outer product of pairs of :class:`Map` objects. - - Examples of constructing a Sparsity: :: - - Sparsity(single_dset, single_map, 'mass') - Sparsity((row_dset, col_dset), (single_rowmap, single_colmap)) - Sparsity((row_dset, col_dset), - [(first_rowmap, first_colmap), (second_rowmap, second_colmap)]) - - .. _MatMPIAIJSetPreallocation: http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatMPIAIJSetPreallocation.html - """ - - def __init__(self, dsets, maps, *, iteration_regions=None, name=None, nest=None, block_sparse=None): - r""" - :param dsets: :class:`DataSet`\s for the left and right function - spaces this :class:`Sparsity` maps between - :param maps: :class:`Map`\s to build the :class:`Sparsity` from - :type maps: a pair of :class:`Map`\s specifying a row map and a column - map, or an iterable of pairs of :class:`Map`\s specifying multiple - row and column maps - if a single :class:`Map` is passed, it is - used as both a row map and a column map - :param iteration_regions: regions that select subsets of extruded maps to iterate over. - :param string name: user-defined label (optional) - :param nest: Should the sparsity over mixed set be built as nested blocks? - :param block_sparse: Should the sparsity for datasets with - cdim > 1 be built as a block sparsity? - """ - # Protect against re-initialization when retrieved from cache - if self._initialized: - return - - self._block_sparse = block_sparse - # Split into a list of row maps and a list of column maps - maps, iteration_regions = zip(*maps) - self._rmaps, self._cmaps = zip(*maps) - self._dsets = dsets - - if isinstance(dsets[0], GlobalDataSet) or isinstance(dsets[1], GlobalDataSet): - self._dims = (((1, 1),),) - self._d_nnz = None - self._o_nnz = None - self._nrows = None if isinstance(dsets[0], GlobalDataSet) else self._rmaps[0].toset.size - self._ncols = None if isinstance(dsets[1], GlobalDataSet) else self._cmaps[0].toset.size - self.lcomm = dsets[0].comm if isinstance(dsets[0], GlobalDataSet) else self._rmaps[0].comm - self.rcomm = dsets[1].comm if isinstance(dsets[1], GlobalDataSet) else self._cmaps[0].comm - else: - self.lcomm = self._rmaps[0].comm - self.rcomm = self._cmaps[0].comm - - rset, cset = self.dsets - # All rmaps and cmaps have the same data set - just use the first. - self._nrows = rset.size - self._ncols = cset.size - - self._has_diagonal = (rset == cset) - - tmp = itertools.product([x.cdim for x in self._dsets[0]], - [x.cdim for x in self._dsets[1]]) - - dims = [[None for _ in range(self.shape[1])] for _ in range(self.shape[0])] - for r in range(self.shape[0]): - for c in range(self.shape[1]): - dims[r][c] = next(tmp) - - self._dims = tuple(tuple(d) for d in dims) - - if self.lcomm != self.rcomm: - raise ValueError("Haven't thought hard enough about different left and right communicators") - self.comm = self.lcomm - - self._name = name or "sparsity_#x%x" % id(self) - - self.iteration_regions = iteration_regions - # If the Sparsity is defined on MixedDataSets, we need to build each - # block separately - if (isinstance(dsets[0], MixedDataSet) or isinstance(dsets[1], MixedDataSet)) \ - and nest: - self._nested = True - self._blocks = [] - for i, rds in enumerate(dsets[0]): - row = [] - for j, cds in enumerate(dsets[1]): - row.append(Sparsity((rds, cds), [(rm.split[i], cm.split[j]) for - rm, cm in maps], - iteration_regions=iteration_regions, - block_sparse=block_sparse)) - self._blocks.append(row) - self._d_nnz = tuple(s._d_nnz for s in self) - self._o_nnz = tuple(s._o_nnz for s in self) - elif isinstance(dsets[0], GlobalDataSet) or isinstance(dsets[1], GlobalDataSet): - # Where the sparsity maps either from or to a Global, we - # don't really have any sparsity structure. - self._blocks = [[self]] - self._nested = False - else: - for dset in dsets: - if isinstance(dset, MixedDataSet) and any([isinstance(d, GlobalDataSet) for d in dset]): - raise SparsityFormatError("Mixed monolithic matrices with Global rows or columns are not supported.") - self._nested = False - with timed_region("CreateSparsity"): - nnz, onnz = build_sparsity(self) - self._d_nnz = nnz - self._o_nnz = onnz - self._blocks = [[self]] - self._initialized = True - - _cache = {} - - @classmethod - @validate_type(('dsets', (Set, DataSet, tuple, list), DataSetTypeError), - ('maps', (Map, tuple, list), MapTypeError)) - def _process_args(cls, dsets, maps, *, iteration_regions=None, name=None, nest=None, block_sparse=None): - "Turn maps argument into a canonical tuple of pairs." - - # A single data set becomes a pair of identical data sets - dsets = [dsets, dsets] if isinstance(dsets, (Set, DataSet)) else list(dsets) - # Upcast Sets to DataSets - dsets = [s ** 1 if isinstance(s, Set) else s for s in dsets] - - # Check data sets are valid - for dset in dsets: - if not isinstance(dset, DataSet) and dset is not None: - raise DataSetTypeError("All data sets must be of type DataSet, not type %r" % type(dset)) - - # A single map becomes a pair of identical maps - maps = (maps, maps) if isinstance(maps, Map) else maps - # A single pair becomes a tuple of one pair - maps = (maps,) if isinstance(maps[0], Map) else maps - - # Check maps are sane - for pair in maps: - if pair[0] is None or pair[1] is None: - # None of this checking makes sense if one of the - # matrix operands is a Global. - continue - for m in pair: - if not isinstance(m, Map): - raise MapTypeError( - "All maps must be of type map, not type %r" % type(m)) - if len(m.values_with_halo) == 0 and m.iterset.total_size > 0: - raise MapValueError( - "Unpopulated map values when trying to build sparsity.") - # Make sure that the "to" Set of each map in a pair is the set of - # the corresponding DataSet set - if not (pair[0].toset == dsets[0].set - and pair[1].toset == dsets[1].set): - raise RuntimeError("Map to set must be the same as corresponding DataSet set") - - # Each pair of maps must have the same from-set (iteration set) - if not pair[0].iterset == pair[1].iterset: - raise RuntimeError("Iterset of both maps in a pair must be the same") - - rmaps, cmaps = zip(*maps) - if iteration_regions is None: - iteration_regions = tuple((ALL, ) for _ in maps) - else: - iteration_regions = tuple(tuple(sorted(region)) for region in iteration_regions) - if not len(rmaps) == len(cmaps): - raise RuntimeError("Must pass equal number of row and column maps") - - if rmaps[0] is not None and cmaps[0] is not None: - # Each row map must have the same to-set (data set) - if not all(m.toset == rmaps[0].toset for m in rmaps): - raise RuntimeError("To set of all row maps must be the same") - - # Each column map must have the same to-set (data set) - if not all(m.toset == cmaps[0].toset for m in cmaps): - raise RuntimeError("To set of all column maps must be the same") - - # Need to return the caching object, a tuple of the processed - # arguments and a dict of kwargs (empty in this case) - if isinstance(dsets[0], GlobalDataSet): - cache = None - elif isinstance(dsets[0].set, MixedSet): - cache = dsets[0].set[0] - else: - cache = dsets[0].set - if nest is None: - nest = configuration["matnest"] - if block_sparse is None: - block_sparse = configuration["block_sparsity"] - - maps = frozenset(zip(maps, iteration_regions)) - kwargs = {"name": name, - "nest": nest, - "block_sparse": block_sparse} - return (cache,) + (tuple(dsets), maps), kwargs - - @classmethod - def _cache_key(cls, dsets, maps, name, nest, block_sparse, *args, **kwargs): - return (dsets, maps, nest, block_sparse) - - def __getitem__(self, idx): - """Return :class:`Sparsity` block with row and column given by ``idx`` - or a given row of blocks.""" - try: - i, j = idx - return self._blocks[i][j] - except TypeError: - return self._blocks[idx] - - @cached_property - def dsets(self): - r"""A pair of :class:`DataSet`\s for the left and right function - spaces this :class:`Sparsity` maps between.""" - return self._dsets - - @cached_property - def maps(self): - """A list of pairs (rmap, cmap) where each pair of - :class:`Map` objects will later be used to assemble into this - matrix. The iterset of each of the maps in a pair must be the - same, while the toset of all the maps which appear first - must be common, this will form the row :class:`Set` of the - sparsity. Similarly, the toset of all the maps which appear - second must be common and will form the column :class:`Set` of - the ``Sparsity``.""" - return list(zip(self._rmaps, self._cmaps)) - - @cached_property - def cmaps(self): - """The list of column maps this sparsity is assembled from.""" - return self._cmaps - - @cached_property - def rmaps(self): - """The list of row maps this sparsity is assembled from.""" - return self._rmaps - - @cached_property - def dims(self): - """A tuple of tuples where the ``i,j``th entry - is a pair giving the number of rows per entry of the row - :class:`Set` and the number of columns per entry of the column - :class:`Set` of the ``Sparsity``. The extents of the first - two indices are given by the :attr:`shape` of the sparsity. - """ - return self._dims - - @cached_property - def shape(self): - """Number of block rows and columns.""" - return (len(self._dsets[0] or [1]), - len(self._dsets[1] or [1])) - - @cached_property - def nrows(self): - """The number of rows in the ``Sparsity``.""" - return self._nrows - - @cached_property - def ncols(self): - """The number of columns in the ``Sparsity``.""" - return self._ncols - - @cached_property - def nested(self): - r"""Whether a sparsity is monolithic (even if it has a block structure). - - To elaborate, if a sparsity maps between - :class:`MixedDataSet`\s, it can either be nested, in which - case it consists of as many blocks are the product of the - length of the datasets it maps between, or monolithic. In the - latter case the sparsity is for the full map between the mixed - datasets, rather than between the blocks of the non-mixed - datasets underneath them. - """ - return self._nested - - @cached_property - def name(self): - """A user-defined label.""" - return self._name - - def __iter__(self): - r"""Iterate over all :class:`Sparsity`\s by row and then by column.""" - for row in self._blocks: - for s in row: - yield s - - def __str__(self): - return "OP2 Sparsity: dsets %s, rmaps %s, cmaps %s, name %s" % \ - (self._dsets, self._rmaps, self._cmaps, self._name) - - def __repr__(self): - return "Sparsity(%r, %r, %r)" % (self.dsets, self.maps, self.name) - - @cached_property - def nnz(self): - """Array containing the number of non-zeroes in the various rows of the - diagonal portion of the local submatrix. - - This is the same as the parameter `d_nnz` used for preallocation in - PETSc's MatMPIAIJSetPreallocation_.""" - return self._d_nnz - - @cached_property - def onnz(self): - """Array containing the number of non-zeroes in the various rows of the - off-diagonal portion of the local submatrix. - - This is the same as the parameter `o_nnz` used for preallocation in - PETSc's MatMPIAIJSetPreallocation_.""" - return self._o_nnz - - @cached_property - def nz(self): - return self._d_nnz.sum() - - @cached_property - def onz(self): - return self._o_nnz.sum() - - def __contains__(self, other): - """Return true if other is a pair of maps in self.maps(). This - will also return true if the elements of other have parents in - self.maps().""" - - for maps in self.maps: - if tuple(other) <= maps: - return True - - return False - - -class Mat(DataCarrier): - r"""OP2 matrix data. A ``Mat`` is defined on a sparsity pattern and holds a value - for each element in the :class:`Sparsity`. - - When a ``Mat`` is passed to :func:`pyop2.op2.par_loop`, the maps via which - indirection occurs for the row and column space, and the access - descriptor are passed by `calling` the ``Mat``. For instance, if a - ``Mat`` named ``A`` is to be accessed for reading via a row :class:`Map` - named ``R`` and a column :class:`Map` named ``C``, this is accomplished by:: - - A(pyop2.READ, (R[pyop2.i[0]], C[pyop2.i[1]])) - - Notice that it is `always` necessary to index the indirection maps - for a ``Mat``. See the :class:`Mat` documentation for more - details. - - .. note :: - - After executing :func:`par_loop`\s that write to a ``Mat`` and - before using it (for example to view its values), you must call - :meth:`assemble` to finalise the writes. - """ - @cached_property - def pack(self): - from pyop2.codegen.builder import MatPack - return MatPack - - ASSEMBLED = "ASSEMBLED" - INSERT_VALUES = "INSERT_VALUES" - ADD_VALUES = "ADD_VALUES" - - _modes = [WRITE, INC] - - @validate_type(('sparsity', Sparsity, SparsityTypeError), - ('name', str, NameTypeError)) - def __init__(self, sparsity, dtype=None, name=None): - self._sparsity = sparsity - self.lcomm = sparsity.lcomm - self.rcomm = sparsity.rcomm - self.comm = sparsity.comm - dtype = dtype or ScalarType - self._datatype = np.dtype(dtype) - self._name = name or "mat_#x%x" % id(self) - self.assembly_state = Mat.ASSEMBLED - - @validate_in(('access', _modes, ModeValueError)) - def __call__(self, access, path, lgmaps=None, unroll_map=False): - path_maps = as_tuple(path, Map, 2) - if configuration["type_check"] and tuple(path_maps) not in self.sparsity: - raise MapValueError("Path maps not in sparsity maps") - return _make_object('Arg', data=self, map=path_maps, access=access, lgmaps=lgmaps, unroll_map=unroll_map) - - @cached_property - def _wrapper_cache_key_(self): - return (type(self), self.dtype, self.dims) - - def assemble(self): - """Finalise this :class:`Mat` ready for use. - - Call this /after/ executing all the par_loops that write to - the matrix before you want to look at it. - """ - raise NotImplementedError("Subclass should implement this") - - def addto_values(self, rows, cols, values): - """Add a block of values to the :class:`Mat`.""" - raise NotImplementedError( - "Abstract Mat base class doesn't know how to set values.") - - def set_values(self, rows, cols, values): - """Set a block of values in the :class:`Mat`.""" - raise NotImplementedError( - "Abstract Mat base class doesn't know how to set values.") - - @cached_property - def nblocks(self): - return int(np.prod(self.sparsity.shape)) - - @cached_property - def _argtypes_(self): - """Ctypes argtype for this :class:`Mat`""" - return tuple(ctypes.c_voidp for _ in self) - - @cached_property - def dims(self): - """A pair of integers giving the number of matrix rows and columns for - each member of the row :class:`Set` and column :class:`Set` - respectively. This corresponds to the ``cdim`` member of a - :class:`DataSet`.""" - return self._sparsity._dims - - @cached_property - def nrows(self): - "The number of rows in the matrix (local to this process)" - return sum(d.size * d.cdim for d in self.sparsity.dsets[0]) - - @cached_property - def nblock_rows(self): - """The number "block" rows in the matrix (local to this process). - - This is equivalent to the number of rows in the matrix divided - by the dimension of the row :class:`DataSet`. - """ - assert len(self.sparsity.dsets[0]) == 1, "Block rows don't make sense for mixed Mats" - return self.sparsity.dsets[0].size - - @cached_property - def nblock_cols(self): - """The number of "block" columns in the matrix (local to this process). - - This is equivalent to the number of columns in the matrix - divided by the dimension of the column :class:`DataSet`. - """ - assert len(self.sparsity.dsets[1]) == 1, "Block cols don't make sense for mixed Mats" - return self.sparsity.dsets[1].size - - @cached_property - def ncols(self): - "The number of columns in the matrix (local to this process)" - return sum(d.size * d.cdim for d in self.sparsity.dsets[1]) - - @cached_property - def sparsity(self): - """:class:`Sparsity` on which the ``Mat`` is defined.""" - return self._sparsity - - @cached_property - def _is_scalar_field(self): - # Sparsity from Dat to MixedDat has a shape like (1, (1, 1)) - # (which you can't take the product of) - return all(np.prod(d) == 1 for d in self.dims) - - @cached_property - def _is_vector_field(self): - return not self._is_scalar_field - - def change_assembly_state(self, new_state): - """Switch the matrix assembly state.""" - if new_state == Mat.ASSEMBLED or self.assembly_state == Mat.ASSEMBLED: - self.assembly_state = new_state - elif new_state != self.assembly_state: - self._flush_assembly() - self.assembly_state = new_state - else: - pass - - def _flush_assembly(self): - """Flush the in flight assembly operations (used when - switching between inserting and adding values).""" - pass - - @property - def values(self): - """A numpy array of matrix values. - - .. warning :: - This is a dense array, so will need a lot of memory. It's - probably not a good idea to access this property if your - matrix has more than around 10000 degrees of freedom. - """ - raise NotImplementedError("Abstract base Mat does not implement values()") - - @cached_property - def dtype(self): - """The Python type of the data.""" - return self._datatype - - @cached_property - def nbytes(self): - """Return an estimate of the size of the data associated with this - :class:`Mat` in bytes. This will be the correct size of the - data payload, but does not take into account the (presumably - small) overhead of the object and its metadata. The memory - associated with the sparsity pattern is also not recorded. - - Note that this is the process local memory usage, not the sum - over all MPI processes. - """ - if self._sparsity._block_sparse: - mult = np.sum(np.prod(self._sparsity.dims)) - else: - mult = 1 - return (self._sparsity.nz + self._sparsity.onz) \ - * self.dtype.itemsize * mult - - def __iter__(self): - """Yield self when iterated over.""" - yield self - - def __mul__(self, other): - """Multiply this :class:`Mat` with the vector ``other``.""" - raise NotImplementedError("Abstract base Mat does not implement multiplication") - - def __str__(self): - return "OP2 Mat: %s, sparsity (%s), datatype %s" \ - % (self._name, self._sparsity, self._datatype.name) - - def __repr__(self): - return "Mat(%r, %r, %r)" \ - % (self._sparsity, self._datatype, self._name) - -# Kernel API - - -class Kernel(Cached): - - """OP2 kernel type. - - :param code: kernel function definition, including signature; either a - string or an AST :class:`.Node` - :param name: kernel function name; must match the name of the kernel - function given in `code` - :param opts: options dictionary for :doc:`PyOP2 IR optimisations ` - (optional, ignored if `code` is a string) - :param include_dirs: list of additional include directories to be searched - when compiling the kernel (optional, defaults to empty) - :param headers: list of system headers to include when compiling the kernel - in the form ``#include `` (optional, defaults to empty) - :param user_code: code snippet to be executed once at the very start of - the generated kernel wrapper code (optional, defaults to - empty) - :param ldargs: A list of arguments to pass to the linker when - compiling this Kernel. - :param requires_zeroed_output_arguments: Does this kernel require the - output arguments to be zeroed on entry when called? (default no) - :param cpp: Is the kernel actually C++ rather than C? If yes, - then compile with the C++ compiler (kernel is wrapped in - extern C for linkage reasons). - - Consider the case of initialising a :class:`~pyop2.Dat` with seeded random - values in the interval 0 to 1. The corresponding :class:`~pyop2.Kernel` is - constructed as follows: :: - - op2.Kernel("void setrand(double *x) { x[0] = (double)random()/RAND_MAX); }", - name="setrand", - headers=["#include "], user_code="srandom(10001);") - - .. note:: - When running in parallel with MPI the generated code must be the same - on all ranks. - """ - - _cache = {} - - @classmethod - @validate_type(('name', str, NameTypeError)) - def _cache_key(cls, code, name, opts={}, include_dirs=[], headers=[], - user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, - flop_count=None): - # Both code and name are relevant since there might be multiple kernels - # extracting different functions from the same code - # Also include the PyOP2 version, since the Kernel class might change - - if isinstance(code, Node): - code = code.gencode() - if isinstance(code, loopy.TranslationUnit): - from loopy.tools import LoopyKeyBuilder - from hashlib import sha256 - key_hash = sha256() - code.update_persistent_hash(key_hash, LoopyKeyBuilder()) - code = key_hash.hexdigest() - hashee = (str(code) + name + str(sorted(opts.items())) + str(include_dirs) - + str(headers) + version + str(ldargs) + str(cpp) + str(requires_zeroed_output_arguments)) - return md5(hashee.encode()).hexdigest() - - @cached_property - def _wrapper_cache_key_(self): - return (self._key, ) - - def __init__(self, code, name, opts={}, include_dirs=[], headers=[], - user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, - flop_count=None): - # Protect against re-initialization when retrieved from cache - if self._initialized: - return - self._name = name - self._cpp = cpp - # Record used optimisations - self._opts = opts - self._include_dirs = include_dirs - self._ldargs = ldargs if ldargs is not None else [] - self._headers = headers - self._user_code = user_code - assert isinstance(code, (str, Node, loopy.Program, loopy.LoopKernel, loopy.TranslationUnit)) - self._code = code - self._initialized = True - self.requires_zeroed_output_arguments = requires_zeroed_output_arguments - self.flop_count = flop_count - - @property - def name(self): - """Kernel name, must match the kernel function name in the code.""" - return self._name - - @property - def code(self): - return self._code - - @cached_property - def num_flops(self): - if self.flop_count is not None: - return self.flop_count - if not configuration["compute_kernel_flops"]: - return 0 - if isinstance(self.code, Node): - v = EstimateFlops() - return v.visit(self.code) - elif isinstance(self.code, loopy.TranslationUnit): - op_map = loopy.get_op_map( - self.code.copy(options=loopy.Options(ignore_boostable_into=True), - silenced_warnings=['insn_count_subgroups_upper_bound', - 'get_x_map_guessing_subgroup_size', - 'summing_if_branches_ops']), - subgroup_size='guess') - return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], dtype=[ScalarType]).eval_and_sum({}) - else: - return 0 - - def __str__(self): - return "OP2 Kernel: %s" % self._name - - def __repr__(self): - return 'Kernel("""%s""", %r)' % (self._code, self._name) - - def __eq__(self, other): - return self.cache_key == other.cache_key - - -class JITModule(Cached): - - """Cached module encapsulating the generated :class:`ParLoop` stub. - - .. warning:: - - Note to implementors. This object is *cached* and therefore - should not hold any references to objects you might want to be - collected (such PyOP2 data objects).""" - - _cache = {} - - @classmethod - def _cache_key(cls, kernel, iterset, *args, **kwargs): - counter = itertools.count() - seen = defaultdict(lambda: next(counter)) - key = ((id(dup_comm(iterset.comm)), ) + kernel._wrapper_cache_key_ + iterset._wrapper_cache_key_ - + (iterset._extruded, (iterset._extruded and iterset.constant_layers), isinstance(iterset, Subset))) - - for arg in args: - key += arg._wrapper_cache_key_ - for map_ in arg.map_tuple: - key += (seen[map_],) - - key += (kwargs.get("iterate", None), cls, configuration["simd_width"]) - - return key - - -class IterationRegion(IntEnum): - BOTTOM = 1 - TOP = 2 - INTERIOR_FACETS = 3 - ALL = 4 - - -ON_BOTTOM = IterationRegion.BOTTOM -"""Iterate over the cells at the bottom of the column in an extruded mesh.""" - -ON_TOP = IterationRegion.TOP -"""Iterate over the top cells in an extruded mesh.""" - -ON_INTERIOR_FACETS = IterationRegion.INTERIOR_FACETS -"""Iterate over the interior facets of an extruded mesh.""" - -ALL = IterationRegion.ALL -"""Iterate over all cells of an extruded mesh.""" - - -class ParLoop(object): - """Represents the kernel, iteration space and arguments of a parallel loop - invocation. - - .. note :: - - Users should not directly construct :class:`ParLoop` objects, but - use :func:`pyop2.op2.par_loop` instead. - - An optional keyword argument, ``iterate``, can be used to specify - which region of an :class:`ExtrudedSet` the parallel loop should - iterate over. - """ - - @validate_type(('kernel', Kernel, KernelTypeError), - ('iterset', Set, SetTypeError)) - def __init__(self, kernel, iterset, *args, **kwargs): - # INCs into globals need to start with zero and then sum back - # into the input global at the end. This has the same number - # of reductions but means that successive par_loops - # incrementing into a global get the "right" value in - # parallel. - # Don't care about MIN and MAX because they commute with the reduction - self._reduced_globals = {} - for i, arg in enumerate(args): - if arg._is_global_reduction and arg.access == INC: - glob = arg.data - tmp = _make_object('Global', glob.dim, data=np.zeros_like(glob.data_ro), dtype=glob.dtype) - self._reduced_globals[tmp] = glob - args[i].data = tmp - - # Always use the current arguments, also when we hit cache - self._actual_args = args - self._kernel = kernel - self._is_layered = iterset._extruded - self._iteration_region = kwargs.get("iterate", None) - self._pass_layer_arg = kwargs.get("pass_layer_arg", False) - - check_iterset(self.args, iterset) - - if self._pass_layer_arg: - if not self._is_layered: - raise ValueError("Can't request layer arg for non-extruded iteration") - - self.iterset = iterset - self.comm = iterset.comm - - for i, arg in enumerate(self._actual_args): - arg.position = i - arg.indirect_position = i - for i, arg1 in enumerate(self._actual_args): - if arg1._is_dat and arg1._is_indirect: - for arg2 in self._actual_args[i:]: - # We have to check for identity here (we really - # want these to be the same thing, not just look - # the same) - if arg2.data is arg1.data and arg2.map is arg1.map: - arg2.indirect_position = arg1.indirect_position - - self.arglist = self.prepare_arglist(iterset, *self.args) - - def prepare_arglist(self, iterset, *args): - """Prepare the argument list for calling generated code. - - :arg iterset: The :class:`Set` iterated over. - :arg args: A list of :class:`Args`, the argument to the :fn:`par_loop`. - """ - return () - - @cached_property - def num_flops(self): - iterset = self.iterset - size = 1 - if iterset._extruded: - region = self.iteration_region - layers = np.mean(iterset.layers_array[:, 1] - iterset.layers_array[:, 0]) - if region is ON_INTERIOR_FACETS: - size = layers - 2 - elif region not in [ON_TOP, ON_BOTTOM]: - size = layers - 1 - return size * self._kernel.num_flops - - def log_flops(self, flops): - pass - - @property - @collective - def _jitmodule(self): - """Return the :class:`JITModule` that encapsulates the compiled par_loop code. - - Return None if the child class should deal with this in another way.""" - return None - - @cached_property - def _parloop_event(self): - return timed_region("ParLoopExecute") - - @collective - def compute(self): - """Executes the kernel over all members of the iteration space.""" - with self._parloop_event: - orig_lgmaps = [] - for arg in self.args: - if arg._is_mat: - new_state = {INC: Mat.ADD_VALUES, - WRITE: Mat.INSERT_VALUES}[arg.access] - for m in arg.data: - m.change_assembly_state(new_state) - arg.data.change_assembly_state(new_state) - # Boundary conditions applied to the matrix appear - # as modified lgmaps on the Arg. We set them onto - # the matrix so things are correctly dropped in - # insertion, and then restore the original lgmaps - # afterwards. - if arg.lgmaps is not None: - olgmaps = [] - for m, lgmaps in zip(arg.data, arg.lgmaps): - olgmaps.append(m.handle.getLGMap()) - m.handle.setLGMap(*lgmaps) - orig_lgmaps.append(olgmaps) - self.global_to_local_begin() - iterset = self.iterset - arglist = self.arglist - fun = self._jitmodule - # Need to ensure INC globals are zero on entry to the loop - # in case it's reused. - for g in self._reduced_globals.keys(): - g._data[...] = 0 - self._compute(iterset.core_part, fun, *arglist) - self.global_to_local_end() - self._compute(iterset.owned_part, fun, *arglist) - self.reduction_begin() - self.local_to_global_begin() - self.update_arg_data_state() - for arg in reversed(self.args): - if arg._is_mat and arg.lgmaps is not None: - for m, lgmaps in zip(arg.data, orig_lgmaps.pop()): - m.handle.setLGMap(*lgmaps) - self.reduction_end() - self.local_to_global_end() - - @collective - def _compute(self, part, fun, *arglist): - """Executes the kernel over all members of a MPI-part of the iteration space. - - :arg part: The :class:`SetPartition` to compute over - :arg fun: The :class:`JITModule` encapsulating the compiled - code (may be ignored by the backend). - :arg arglist: The arguments to pass to the compiled code (may - be ignored by the backend, depending on the exact implementation)""" - raise RuntimeError("Must select a backend") - - @collective - def global_to_local_begin(self): - """Start halo exchanges.""" - for arg in self.unique_dat_args: - arg.global_to_local_begin() - - @collective - def global_to_local_end(self): - """Finish halo exchanges""" - for arg in self.unique_dat_args: - arg.global_to_local_end() - - @collective - def local_to_global_begin(self): - """Start halo exchanges.""" - for arg in self.unique_dat_args: - arg.local_to_global_begin() - - @collective - def local_to_global_end(self): - """Finish halo exchanges (wait on irecvs)""" - for arg in self.unique_dat_args: - arg.local_to_global_end() - - @cached_property - def _reduction_event_begin(self): - return timed_region("ParLoopRednBegin") - - @cached_property - def _reduction_event_end(self): - return timed_region("ParLoopRednEnd") - - @cached_property - def _has_reduction(self): - return len(self.global_reduction_args) > 0 - - @collective - def reduction_begin(self): - """Start reductions""" - if not self._has_reduction: - return - with self._reduction_event_begin: - for arg in self.global_reduction_args: - arg.reduction_begin(self.comm) - - @collective - def reduction_end(self): - """End reductions""" - if not self._has_reduction: - return - with self._reduction_event_end: - for arg in self.global_reduction_args: - arg.reduction_end(self.comm) - # Finalise global increments - for tmp, glob in self._reduced_globals.items(): - glob._data += tmp._data - - @collective - def update_arg_data_state(self): - r"""Update the state of the :class:`DataCarrier`\s in the arguments to the `par_loop`. - - This marks :class:`Mat`\s that need assembly.""" - for arg in self.args: - access = arg.access - if access is READ: - continue - if arg._is_dat: - arg.data.halo_valid = False - if arg._is_mat: - state = {WRITE: Mat.INSERT_VALUES, - INC: Mat.ADD_VALUES}[access] - arg.data.assembly_state = state - - @cached_property - def dat_args(self): - return tuple(arg for arg in self.args if arg._is_dat) - - @cached_property - def unique_dat_args(self): - seen = {} - unique = [] - for arg in self.dat_args: - if arg.data not in seen: - unique.append(arg) - seen[arg.data] = arg - elif arg.access != seen[arg.data].access: - raise ValueError("Same Dat appears multiple times with different " - "access descriptors") - return tuple(unique) - - @cached_property - def global_reduction_args(self): - return tuple(arg for arg in self.args if arg._is_global_reduction) - - @cached_property - def kernel(self): - """Kernel executed by this parallel loop.""" - return self._kernel - - @cached_property - def args(self): - """Arguments to this parallel loop.""" - return self._actual_args - - @cached_property - def is_layered(self): - """Flag which triggers extrusion""" - return self._is_layered - - @cached_property - def iteration_region(self): - """Specifies the part of the mesh the parallel loop will - be iterating over. The effect is the loop only iterates over - a certain part of an extruded mesh, for example on top cells, bottom cells or - interior facets.""" - return self._iteration_region - - -def check_iterset(args, iterset): - """Checks that the iteration set of the :class:`ParLoop` matches the - iteration set of all its arguments. A :class:`MapValueError` is raised - if this condition is not met.""" - - if isinstance(iterset, Subset): - _iterset = iterset.superset - else: - _iterset = iterset - if configuration["type_check"]: - if isinstance(_iterset, MixedSet): - raise SetTypeError("Cannot iterate over MixedSets") - for i, arg in enumerate(args): - if arg._is_global: - continue - if arg._is_direct: - if isinstance(_iterset, ExtrudedSet): - if arg.data.dataset.set != _iterset.parent: - raise MapValueError( - "Iterset of direct arg %s doesn't match ParLoop iterset." % i) - elif arg.data.dataset.set != _iterset: - raise MapValueError( - "Iterset of direct arg %s doesn't match ParLoop iterset." % i) - continue - for j, m in enumerate(arg._map): - if isinstance(_iterset, ExtrudedSet): - if m.iterset != _iterset and m.iterset not in _iterset: - raise MapValueError( - "Iterset of arg %s map %s doesn't match ParLoop iterset." % (i, j)) - elif m.iterset != _iterset and m.iterset not in _iterset: - raise MapValueError( - "Iterset of arg %s map %s doesn't match ParLoop iterset." % (i, j)) - - -@collective -def par_loop(kernel, iterset, *args, **kwargs): - r"""Invocation of an OP2 kernel - - :arg kernel: The :class:`Kernel` to be executed. - :arg iterset: The iteration :class:`Set` over which the kernel should be - executed. - :arg \*args: One or more :class:`base.Arg`\s constructed from a - :class:`Global`, :class:`Dat` or :class:`Mat` using the call - syntax and passing in an optionally indexed :class:`Map` - through which this :class:`base.Arg` is accessed and the - :class:`base.Access` descriptor indicating how the - :class:`Kernel` is going to access this data (see the example - below). These are the global data structures from and to - which the kernel will read and write. - :kwarg iterate: Optionally specify which region of an - :class:`ExtrudedSet` to iterate over. - Valid values are: - - - ``ON_BOTTOM``: iterate over the bottom layer of cells. - - ``ON_TOP`` iterate over the top layer of cells. - - ``ALL`` iterate over all cells (the default if unspecified) - - ``ON_INTERIOR_FACETS`` iterate over all the layers - except the top layer, accessing data two adjacent (in - the extruded direction) cells at a time. - - :kwarg pass_layer_arg: Should the wrapper pass the current layer - into the kernel (as an ``int``). Only makes sense for - indirect extruded iteration. - - .. warning :: - It is the caller's responsibility that the number and type of all - :class:`base.Arg`\s passed to the :func:`par_loop` match those expected - by the :class:`Kernel`. No runtime check is performed to ensure this! - - :func:`par_loop` invocation is illustrated by the following example :: - - pyop2.par_loop(mass, elements, - mat(pyop2.INC, (elem_node[pyop2.i[0]]), elem_node[pyop2.i[1]]), - coords(pyop2.READ, elem_node)) - - This example will execute the :class:`Kernel` ``mass`` over the - :class:`Set` ``elements`` executing 3x3 times for each - :class:`Set` member, assuming the :class:`Map` ``elem_node`` is of arity 3. - The :class:`Kernel` takes four arguments, the first is a :class:`Mat` named - ``mat``, the second is a field named ``coords``. The remaining two arguments - indicate which local iteration space point the kernel is to execute. - - A :class:`Mat` requires a pair of :class:`Map` objects, one each - for the row and column spaces. In this case both are the same - ``elem_node`` map. The row :class:`Map` is indexed by the first - index in the local iteration space, indicated by the ``0`` index - to :data:`pyop2.i`, while the column space is indexed by - the second local index. The matrix is accessed to increment - values using the ``pyop2.INC`` access descriptor. - - The ``coords`` :class:`Dat` is also accessed via the ``elem_node`` - :class:`Map`, however no indices are passed so all entries of - ``elem_node`` for the relevant member of ``elements`` will be - passed to the kernel as a vector. - """ - if isinstance(kernel, types.FunctionType): - from pyop2 import pyparloop - return pyparloop.ParLoop(kernel, iterset, *args, **kwargs).compute() - return _make_object('ParLoop', kernel, iterset, *args, **kwargs).compute() diff --git a/pyop2/codegen/rep2loopy.py b/pyop2/codegen/rep2loopy.py index 2dd21310e..ba8f17fb4 100644 --- a/pyop2/codegen/rep2loopy.py +++ b/pyop2/codegen/rep2loopy.py @@ -19,7 +19,7 @@ from pyop2.codegen.node import traversal, Node, Memoizer, reuse_if_untouched -from pyop2.base import READ, WRITE +from pyop2.types.access import READ, WRITE from pyop2.datatypes import as_ctypes from pyop2.codegen.optimise import index_merger, rename_nodes diff --git a/pyop2/compilation.py b/pyop2/compilation.py index 97e0b4c0f..aabdaa9c1 100644 --- a/pyop2/compilation.py +++ b/pyop2/compilation.py @@ -48,7 +48,6 @@ from pyop2.configuration import configuration from pyop2.logger import debug, progress, INFO from pyop2.exceptions import CompilationError -from pyop2.base import JITModule def _check_hashes(x, y, datatype): @@ -466,6 +465,7 @@ def load(jitmodule, extension, fn_name, cppargs=[], ldargs=[], :kwarg comm: Optional communicator to compile the code on (only rank 0 compiles code) (defaults to COMM_WORLD). """ + from pyop2.parloop import JITModule if isinstance(jitmodule, str): class StrCode(object): def __init__(self, code, argtypes): diff --git a/pyop2/kernel.py b/pyop2/kernel.py new file mode 100644 index 000000000..9a6c15387 --- /dev/null +++ b/pyop2/kernel.py @@ -0,0 +1,150 @@ +import hashlib + +import coffee +import loopy as lp + +from . import caching, configuration as conf, datatypes, exceptions as ex, utils, version + + +class Kernel(caching.Cached): + + """OP2 kernel type. + + :param code: kernel function definition, including signature; either a + string or an AST :class:`.Node` + :param name: kernel function name; must match the name of the kernel + function given in `code` + :param opts: options dictionary for :doc:`PyOP2 IR optimisations ` + (optional, ignored if `code` is a string) + :param include_dirs: list of additional include directories to be searched + when compiling the kernel (optional, defaults to empty) + :param headers: list of system headers to include when compiling the kernel + in the form ``#include `` (optional, defaults to empty) + :param user_code: code snippet to be executed once at the very start of + the generated kernel wrapper code (optional, defaults to + empty) + :param ldargs: A list of arguments to pass to the linker when + compiling this Kernel. + :param requires_zeroed_output_arguments: Does this kernel require the + output arguments to be zeroed on entry when called? (default no) + :param cpp: Is the kernel actually C++ rather than C? If yes, + then compile with the C++ compiler (kernel is wrapped in + extern C for linkage reasons). + + Consider the case of initialising a :class:`~pyop2.Dat` with seeded random + values in the interval 0 to 1. The corresponding :class:`~pyop2.Kernel` is + constructed as follows: :: + + op2.Kernel("void setrand(double *x) { x[0] = (double)random()/RAND_MAX); }", + name="setrand", + headers=["#include "], user_code="srandom(10001);") + + .. note:: + When running in parallel with MPI the generated code must be the same + on all ranks. + """ + + _cache = {} + + @classmethod + @utils.validate_type(('name', str, ex.NameTypeError)) + def _cache_key(cls, code, name, opts={}, include_dirs=[], headers=[], + user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, + flop_count=None): + # Both code and name are relevant since there might be multiple kernels + # extracting different functions from the same code + # Also include the PyOP2 version, since the Kernel class might change + + if isinstance(code, coffee.base.Node): + code = code.gencode() + if isinstance(code, lp.TranslationUnit): + from loopy.tools import LoopyKeyBuilder + from hashlib import sha256 + key_hash = sha256() + code.update_persistent_hash(key_hash, LoopyKeyBuilder()) + code = key_hash.hexdigest() + hashee = (str(code) + name + str(sorted(opts.items())) + str(include_dirs) + + str(headers) + version.__version__ + str(ldargs) + str(cpp) + str(requires_zeroed_output_arguments)) + return hashlib.md5(hashee.encode()).hexdigest() + + @utils.cached_property + def _wrapper_cache_key_(self): + return (self._key, ) + + def __init__(self, code, name, opts={}, include_dirs=[], headers=[], + user_code="", ldargs=None, cpp=False, requires_zeroed_output_arguments=False, + flop_count=None): + # Protect against re-initialization when retrieved from cache + if self._initialized: + return + self._name = name + self._cpp = cpp + # Record used optimisations + self._opts = opts + self._include_dirs = include_dirs + self._ldargs = ldargs if ldargs is not None else [] + self._headers = headers + self._user_code = user_code + assert isinstance(code, (str, coffee.base.Node, lp.Program, lp.LoopKernel, lp.TranslationUnit)) + self._code = code + self._initialized = True + self.requires_zeroed_output_arguments = requires_zeroed_output_arguments + self.flop_count = flop_count + + @property + def name(self): + """Kernel name, must match the kernel function name in the code.""" + return self._name + + @property + def code(self): + return self._code + + @utils.cached_property + def num_flops(self): + if self.flop_count is not None: + return self.flop_count + if not conf.configuration["compute_kernel_flops"]: + return 0 + if isinstance(self.code, coffee.base.Node): + v = coffee.visitors.EstimateFlops() + return v.visit(self.code) + elif isinstance(self.code, lp.TranslationUnit): + op_map = lp.get_op_map( + self.code.copy(options=lp.Options(ignore_boostable_into=True), + silenced_warnings=['insn_count_subgroups_upper_bound', + 'get_x_map_guessing_subgroup_size', + 'summing_if_branches_ops']), + subgroup_size='guess') + return op_map.filter_by(name=['add', 'sub', 'mul', 'div'], dtype=[datatypes.ScalarType]).eval_and_sum({}) + else: + return 0 + + def __str__(self): + return "OP2 Kernel: %s" % self._name + + def __repr__(self): + return 'Kernel("""%s""", %r)' % (self._code, self._name) + + def __eq__(self, other): + return self.cache_key == other.cache_key + + +class PyKernel(Kernel): + @classmethod + def _cache_key(cls, *args, **kwargs): + return None + + def __init__(self, code, name=None, **kwargs): + self._func = code + self._name = name + + def __getattr__(self, attr): + """Return None on unrecognised attributes""" + return None + + def __call__(self, *args): + return self._func(*args) + + def __repr__(self): + return 'Kernel("""%s""", %r)' % (self._func, self._name) diff --git a/pyop2/op2.py b/pyop2/op2.py index 84ac26056..9611afb34 100644 --- a/pyop2/op2.py +++ b/pyop2/op2.py @@ -39,15 +39,18 @@ from pyop2.logger import debug, info, warning, error, critical, set_log_level from pyop2.mpi import MPI, COMM_WORLD, collective -from pyop2.sequential import par_loop, Kernel # noqa: F401 -from pyop2.sequential import READ, WRITE, RW, INC, MIN, MAX # noqa: F401 -from pyop2.base import ON_BOTTOM, ON_TOP, ON_INTERIOR_FACETS, ALL # noqa: F401 -from pyop2.sequential import Set, ExtrudedSet, MixedSet, Subset, DataSet, MixedDataSet # noqa: F401 -from pyop2.sequential import Map, MixedMap, PermutedMap, Sparsity, Halo # noqa: F401 -from pyop2.sequential import Global, GlobalDataSet # noqa: F401 -from pyop2.sequential import Dat, MixedDat, DatView, Mat # noqa: F401 -from pyop2.sequential import ParLoop as SeqParLoop -from pyop2.pyparloop import ParLoop as PyParLoop +from .types import ( + Set, ExtrudedSet, MixedSet, Subset, DataSet, MixedDataSet, + Map, MixedMap, PermutedMap, Sparsity, Halo, + Global, GlobalDataSet, + Dat, MixedDat, DatView, Mat +) +from .types.access import READ, WRITE, RW, INC, MIN, MAX + +from pyop2.parloop import par_loop, ON_BOTTOM, ON_TOP, ON_INTERIOR_FACETS, ALL +from pyop2.kernel import Kernel + +from pyop2.parloop import ParLoop as SeqParLoop, PyParLoop import types import loopy diff --git a/pyop2/parloop.py b/pyop2/parloop.py new file mode 100644 index 000000000..081fb33cc --- /dev/null +++ b/pyop2/parloop.py @@ -0,0 +1,1003 @@ +import abc +import collections +import copy +import ctypes +import enum +import itertools +import operator +import os +import types + +import loopy as lp +import numpy as np +from petsc4py import PETSc + +from . import ( + caching, + compilation, + configuration as conf, + datatypes as dtypes, + exceptions as ex, + mpi, + profiling, + utils +) +from .kernel import Kernel, PyKernel +from .types import ( + Access, + Global, Dat, DatView, Mat, Map, MixedDat, AbstractDat, AbstractMat, + Set, MixedSet, ExtrudedSet, Subset +) + + +class Arg: + + """An argument to a :func:`pyop2.op2.par_loop`. + + .. warning :: + User code should not directly instantiate :class:`Arg`. + Instead, use the call syntax on the :class:`DataCarrier`. + """ + + def __init__(self, data=None, map=None, access=None, lgmaps=None, unroll_map=False): + """ + :param data: A data-carrying object, either :class:`Dat` or class:`Mat` + :param map: A :class:`Map` to access this :class:`Arg` or the default + if the identity map is to be used. + :param access: An access descriptor of type :class:`Access` + :param lgmaps: For :class:`Mat` objects, a tuple of 2-tuples of local to + global maps used during assembly. + + Checks that: + + 1. the maps used are initialized i.e. have mapping data associated, and + 2. the to Set of the map used to access it matches the Set it is + defined on. + + A :class:`MapValueError` is raised if these conditions are not met.""" + self.data = data + self._map = map + if map is None: + self.map_tuple = () + elif isinstance(map, Map): + self.map_tuple = (map, ) + else: + self.map_tuple = tuple(map) + + if data is not None and hasattr(data, "dtype"): + if data.dtype.kind == "c" and (access == Access.MIN or access == Access.MAX): + raise ValueError("MIN and MAX access descriptors are undefined on complex data.") + self._access = access + + self.unroll_map = unroll_map + self.lgmaps = None + if self._is_mat and lgmaps is not None: + self.lgmaps = utils.as_tuple(lgmaps) + assert len(self.lgmaps) == self.data.nblocks + else: + if lgmaps is not None: + raise ValueError("Local to global maps only for matrices") + + # Check arguments for consistency + if conf.configuration["type_check"] and not (self._is_global or map is None): + for j, m in enumerate(map): + if m.iterset.total_size > 0 and len(m.values_with_halo) == 0: + raise ex.MapValueError("%s is not initialized." % map) + if self._is_mat and m.toset != data.sparsity.dsets[j].set: + raise ex.MapValueError( + "To set of %s doesn't match the set of %s." % (map, data)) + if self._is_dat and map.toset != data.dataset.set: + raise ex.MapValueError( + "To set of %s doesn't match the set of %s." % (map, data)) + + def recreate(self, data=None, map=None, access=None, lgmaps=None, unroll_map=None): + """Creates a new Dat based on the existing Dat with the changes specified. + + :param data: A data-carrying object, either :class:`Dat` or class:`Mat` + :param map: A :class:`Map` to access this :class:`Arg` or the default + if the identity map is to be used. + :param access: An access descriptor of type :class:`Access` + :param lgmaps: For :class:`Mat` objects, a tuple of 2-tuples of local to + global maps used during assembly.""" + return type(self)(data=data or self.data, + map=map or self.map, + access=access or self.access, + lgmaps=lgmaps or self.lgmaps, + unroll_map=False if unroll_map is None else unroll_map) + + @utils.cached_property + def _kernel_args_(self): + return self.data._kernel_args_ + + @utils.cached_property + def _argtypes_(self): + return self.data._argtypes_ + + @utils.cached_property + def _wrapper_cache_key_(self): + if self.map is not None: + map_ = tuple(None if m is None else m._wrapper_cache_key_ for m in self.map) + else: + map_ = self.map + return (type(self), self.access, self.data._wrapper_cache_key_, map_, self.unroll_map) + + @property + def _key(self): + return (self.data, self._map, self._access) + + def __eq__(self, other): + r""":class:`Arg`\s compare equal of they are defined on the same data, + use the same :class:`Map` with the same index and the same access + descriptor.""" + return self._key == other._key + + def __ne__(self, other): + r""":class:`Arg`\s compare equal of they are defined on the same data, + use the same :class:`Map` with the same index and the same access + descriptor.""" + return not self.__eq__(other) + + def __str__(self): + return "OP2 Arg: dat %s, map %s, access %s" % \ + (self.data, self._map, self._access) + + def __repr__(self): + return "Arg(%r, %r, %r)" % \ + (self.data, self._map, self._access) + + def __iter__(self): + for arg in self.split: + yield arg + + @utils.cached_property + def split(self): + """Split a mixed argument into a tuple of constituent arguments.""" + if self._is_mixed_dat: + return tuple(Arg(d, m, self._access) + for d, m in zip(self.data, self._map)) + elif self._is_mixed_mat: + rows, cols = self.data.sparsity.shape + mr, mc = self.map + return tuple(Arg(self.data[i, j], (mr.split[i], mc.split[j]), self._access) + for i in range(rows) for j in range(cols)) + else: + return (self,) + + @utils.cached_property + def name(self): + """The generated argument name.""" + return "arg%d" % self.position + + @utils.cached_property + def ctype(self): + """String representing the C type of the data in this ``Arg``.""" + return self.data.ctype + + @utils.cached_property + def dtype(self): + """Numpy datatype of this Arg""" + return self.data.dtype + + @utils.cached_property + def map(self): + """The :class:`Map` via which the data is to be accessed.""" + return self._map + + @utils.cached_property + def access(self): + """Access descriptor. One of the constants of type :class:`Access`""" + return self._access + + @utils.cached_property + def _is_dat_view(self): + return isinstance(self.data, DatView) + + @utils.cached_property + def _is_mat(self): + return isinstance(self.data, AbstractMat) + + @utils.cached_property + def _is_mixed_mat(self): + return self._is_mat and self.data.sparsity.shape > (1, 1) + + @utils.cached_property + def _is_global(self): + return isinstance(self.data, Global) + + @utils.cached_property + def _is_global_reduction(self): + return self._is_global and self._access in {Access.INC, Access.MIN, Access.MAX} + + @utils.cached_property + def _is_dat(self): + return isinstance(self.data, AbstractDat) + + @utils.cached_property + def _is_mixed_dat(self): + return isinstance(self.data, MixedDat) + + @utils.cached_property + def _is_mixed(self): + return self._is_mixed_dat or self._is_mixed_mat + + @utils.cached_property + def _is_direct(self): + return isinstance(self.data, Dat) and self.map is None + + @utils.cached_property + def _is_indirect(self): + return isinstance(self.data, Dat) and self.map is not None + + @mpi.collective + def global_to_local_begin(self): + """Begin halo exchange for the argument if a halo update is required. + Doing halo exchanges only makes sense for :class:`Dat` objects. + """ + assert self._is_dat, "Doing halo exchanges only makes sense for Dats" + if self._is_direct: + return + if self.access is not Access.WRITE: + self.data.global_to_local_begin(self.access) + + @mpi.collective + def global_to_local_end(self): + """Finish halo exchange for the argument if a halo update is required. + Doing halo exchanges only makes sense for :class:`Dat` objects. + """ + assert self._is_dat, "Doing halo exchanges only makes sense for Dats" + if self._is_direct: + return + if self.access is not Access.WRITE: + self.data.global_to_local_end(self.access) + + @mpi.collective + def local_to_global_begin(self): + assert self._is_dat, "Doing halo exchanges only makes sense for Dats" + if self._is_direct: + return + if self.access in {Access.INC, Access.MIN, Access.MAX}: + self.data.local_to_global_begin(self.access) + + @mpi.collective + def local_to_global_end(self): + assert self._is_dat, "Doing halo exchanges only makes sense for Dats" + if self._is_direct: + return + if self.access in {Access.INC, Access.MIN, Access.MAX}: + self.data.local_to_global_end(self.access) + + @mpi.collective + def reduction_begin(self, comm): + """Begin reduction for the argument if its access is INC, MIN, or MAX. + Doing a reduction only makes sense for :class:`Global` objects.""" + assert self._is_global, \ + "Doing global reduction only makes sense for Globals" + if self.access is not Access.READ: + if self.access is Access.INC: + op = mpi.MPI.SUM + elif self.access is Access.MIN: + op = mpi.MPI.MIN + elif self.access is Access.MAX: + op = mpi.MPI.MAX + if mpi.MPI.VERSION >= 3: + self._reduction_req = comm.Iallreduce(self.data._data, self.data._buf, op=op) + else: + comm.Allreduce(self.data._data, self.data._buf, op=op) + + @mpi.collective + def reduction_end(self, comm): + """End reduction for the argument if it is in flight. + Doing a reduction only makes sense for :class:`Global` objects.""" + assert self._is_global, \ + "Doing global reduction only makes sense for Globals" + if self.access is not Access.READ: + if mpi.MPI.VERSION >= 3: + self._reduction_req.Wait() + self._reduction_req = None + self.data._data[:] = self.data._buf[:] + + +class JITModule(caching.Cached): + + """Cached module encapsulating the generated :class:`ParLoop` stub. + + .. warning:: + + Note to implementors. This object is *cached* and therefore + should not hold any references to objects you might want to be + collected (such PyOP2 data objects).""" + + _cppargs = [] + _libraries = [] + _system_headers = [] + + _cache = {} + + @classmethod + def _cache_key(cls, kernel, iterset, *args, **kwargs): + counter = itertools.count() + seen = collections.defaultdict(lambda: next(counter)) + key = ((id(mpi.dup_comm(iterset.comm)), ) + kernel._wrapper_cache_key_ + iterset._wrapper_cache_key_ + + (iterset._extruded, (iterset._extruded and iterset.constant_layers), isinstance(iterset, Subset))) + + for arg in args: + key += arg._wrapper_cache_key_ + for map_ in arg.map_tuple: + key += (seen[map_],) + + key += (kwargs.get("iterate", None), cls, conf.configuration["simd_width"]) + + return key + + def __init__(self, kernel, iterset, *args, **kwargs): + r""" + A cached compiled function to execute for a specified par_loop. + + See :func:`~.par_loop` for the description of arguments. + + .. warning :: + + Note to implementors. This object is *cached*, and therefore + should not hold any long term references to objects that + you want to be collected. In particular, after the + ``args`` have been inspected to produce the compiled code, + they **must not** remain part of the object's slots, + otherwise they (and the :class:`~.Dat`\s, :class:`~.Map`\s + and :class:`~.Mat`\s they reference) will never be collected. + """ + # Return early if we were in the cache. + if self._initialized: + return + self.comm = iterset.comm + self._kernel = kernel + self._fun = None + self._iterset = iterset + self._args = args + self._iteration_region = kwargs.get('iterate', ALL) + self._pass_layer_arg = kwargs.get('pass_layer_arg', False) + # Copy the class variables, so we don't overwrite them + self._cppargs = copy.deepcopy(type(self)._cppargs) + self._libraries = copy.deepcopy(type(self)._libraries) + self._system_headers = copy.deepcopy(type(self)._system_headers) + if not kwargs.get('delay', False): + self.compile() + self._initialized = True + + @mpi.collective + def __call__(self, *args): + return self._fun(*args) + + @utils.cached_property + def _wrapper_name(self): + return 'wrap_%s' % self._kernel.name + + @utils.cached_property + def code_to_compile(self): + from pyop2.codegen.builder import WrapperBuilder + from pyop2.codegen.rep2loopy import generate + + builder = WrapperBuilder(kernel=self._kernel, + iterset=self._iterset, + iteration_region=self._iteration_region, + pass_layer_to_kernel=self._pass_layer_arg) + for arg in self._args: + builder.add_argument(arg) + + wrapper = generate(builder) + code = lp.generate_code_v2(wrapper) + + if self._kernel._cpp: + from loopy.codegen.result import process_preambles + preamble = "".join(process_preambles(getattr(code, "device_preambles", []))) + device_code = "\n\n".join(str(dp.ast) for dp in code.device_programs) + return preamble + "\nextern \"C\" {\n" + device_code + "\n}\n" + return code.device_code() + + @PETSc.Log.EventDecorator() + @mpi.collective + def compile(self): + # If we weren't in the cache we /must/ have arguments + if not hasattr(self, '_args'): + raise RuntimeError("JITModule has no args associated with it, should never happen") + + compiler = conf.configuration["compiler"] + extension = "cpp" if self._kernel._cpp else "c" + cppargs = self._cppargs + cppargs += ["-I%s/include" % d for d in utils.get_petsc_dir()] + \ + ["-I%s" % d for d in self._kernel._include_dirs] + \ + ["-I%s" % os.path.abspath(os.path.dirname(__file__))] + ldargs = ["-L%s/lib" % d for d in utils.get_petsc_dir()] + \ + ["-Wl,-rpath,%s/lib" % d for d in utils.get_petsc_dir()] + \ + ["-lpetsc", "-lm"] + self._libraries + ldargs += self._kernel._ldargs + + self._fun = compilation.load(self, + extension, + self._wrapper_name, + cppargs=cppargs, + ldargs=ldargs, + restype=ctypes.c_int, + compiler=compiler, + comm=self.comm) + # Blow away everything we don't need any more + del self._args + del self._kernel + del self._iterset + + @utils.cached_property + def argtypes(self): + index_type = dtypes.as_ctypes(dtypes.IntType) + argtypes = (index_type, index_type) + argtypes += self._iterset._argtypes_ + for arg in self._args: + argtypes += arg._argtypes_ + seen = set() + for arg in self._args: + maps = arg.map_tuple + for map_ in maps: + for k, t in zip(map_._kernel_args_, map_._argtypes_): + if k in seen: + continue + argtypes += (t,) + seen.add(k) + return argtypes + + +class IterationRegion(enum.IntEnum): + BOTTOM = 1 + TOP = 2 + INTERIOR_FACETS = 3 + ALL = 4 + + +ON_BOTTOM = IterationRegion.BOTTOM +"""Iterate over the cells at the bottom of the column in an extruded mesh.""" + +ON_TOP = IterationRegion.TOP +"""Iterate over the top cells in an extruded mesh.""" + +ON_INTERIOR_FACETS = IterationRegion.INTERIOR_FACETS +"""Iterate over the interior facets of an extruded mesh.""" + +ALL = IterationRegion.ALL +"""Iterate over all cells of an extruded mesh.""" + + +class AbstractParLoop(abc.ABC): + """Represents the kernel, iteration space and arguments of a parallel loop + invocation. + .. note :: + Users should not directly construct :class:`ParLoop` objects, but + use :func:`pyop2.op2.par_loop` instead. + An optional keyword argument, ``iterate``, can be used to specify + which region of an :class:`ExtrudedSet` the parallel loop should + iterate over. + """ + + @utils.validate_type(('kernel', Kernel, ex.KernelTypeError), + ('iterset', Set, ex.SetTypeError)) + def __init__(self, kernel, iterset, *args, **kwargs): + # INCs into globals need to start with zero and then sum back + # into the input global at the end. This has the same number + # of reductions but means that successive par_loops + # incrementing into a global get the "right" value in + # parallel. + # Don't care about MIN and MAX because they commute with the reduction + self._reduced_globals = {} + for i, arg in enumerate(args): + if arg._is_global_reduction and arg.access == Access.INC: + glob = arg.data + tmp = Global(glob.dim, data=np.zeros_like(glob.data_ro), dtype=glob.dtype) + self._reduced_globals[tmp] = glob + args[i].data = tmp + + # Always use the current arguments, also when we hit cache + self._actual_args = args + self._kernel = kernel + self._is_layered = iterset._extruded + self._iteration_region = kwargs.get("iterate", None) + self._pass_layer_arg = kwargs.get("pass_layer_arg", False) + + check_iterset(self.args, iterset) + + if self._pass_layer_arg: + if not self._is_layered: + raise ValueError("Can't request layer arg for non-extruded iteration") + + self.iterset = iterset + self.comm = iterset.comm + + for i, arg in enumerate(self._actual_args): + arg.position = i + arg.indirect_position = i + for i, arg1 in enumerate(self._actual_args): + if arg1._is_dat and arg1._is_indirect: + for arg2 in self._actual_args[i:]: + # We have to check for identity here (we really + # want these to be the same thing, not just look + # the same) + if arg2.data is arg1.data and arg2.map is arg1.map: + arg2.indirect_position = arg1.indirect_position + + self.arglist = self.prepare_arglist(iterset, *self.args) + + def prepare_arglist(self, iterset, *args): + """Prepare the argument list for calling generated code. + :arg iterset: The :class:`Set` iterated over. + :arg args: A list of :class:`Args`, the argument to the :fn:`par_loop`. + """ + return () + + @utils.cached_property + def num_flops(self): + iterset = self.iterset + size = 1 + if iterset._extruded: + region = self.iteration_region + layers = np.mean(iterset.layers_array[:, 1] - iterset.layers_array[:, 0]) + if region is ON_INTERIOR_FACETS: + size = layers - 2 + elif region not in [ON_TOP, ON_BOTTOM]: + size = layers - 1 + return size * self._kernel.num_flops + + def log_flops(self, flops): + pass + + @property + @mpi.collective + def _jitmodule(self): + """Return the :class:`JITModule` that encapsulates the compiled par_loop code. + Return None if the child class should deal with this in another way.""" + return None + + @utils.cached_property + def _parloop_event(self): + return profiling.timed_region("ParLoopExecute") + + @mpi.collective + def compute(self): + """Executes the kernel over all members of the iteration space.""" + with self._parloop_event: + orig_lgmaps = [] + for arg in self.args: + if arg._is_mat: + new_state = {Access.INC: Mat.ADD_VALUES, + Access.WRITE: Mat.INSERT_VALUES}[arg.access] + for m in arg.data: + m.change_assembly_state(new_state) + arg.data.change_assembly_state(new_state) + # Boundary conditions applied to the matrix appear + # as modified lgmaps on the Arg. We set them onto + # the matrix so things are correctly dropped in + # insertion, and then restore the original lgmaps + # afterwards. + if arg.lgmaps is not None: + olgmaps = [] + for m, lgmaps in zip(arg.data, arg.lgmaps): + olgmaps.append(m.handle.getLGMap()) + m.handle.setLGMap(*lgmaps) + orig_lgmaps.append(olgmaps) + self.global_to_local_begin() + iterset = self.iterset + arglist = self.arglist + fun = self._jitmodule + # Need to ensure INC globals are zero on entry to the loop + # in case it's reused. + for g in self._reduced_globals.keys(): + g._data[...] = 0 + self._compute(iterset.core_part, fun, *arglist) + self.global_to_local_end() + self._compute(iterset.owned_part, fun, *arglist) + self.reduction_begin() + self.local_to_global_begin() + self.update_arg_data_state() + for arg in reversed(self.args): + if arg._is_mat and arg.lgmaps is not None: + for m, lgmaps in zip(arg.data, orig_lgmaps.pop()): + m.handle.setLGMap(*lgmaps) + self.reduction_end() + self.local_to_global_end() + + @mpi.collective + def _compute(self, part, fun, *arglist): + """Executes the kernel over all members of a MPI-part of the iteration space. + :arg part: The :class:`SetPartition` to compute over + :arg fun: The :class:`JITModule` encapsulating the compiled + code (may be ignored by the backend). + :arg arglist: The arguments to pass to the compiled code (may + be ignored by the backend, depending on the exact implementation)""" + raise RuntimeError("Must select a backend") + + @mpi.collective + def global_to_local_begin(self): + """Start halo exchanges.""" + for arg in self.unique_dat_args: + arg.global_to_local_begin() + + @mpi.collective + def global_to_local_end(self): + """Finish halo exchanges""" + for arg in self.unique_dat_args: + arg.global_to_local_end() + + @mpi.collective + def local_to_global_begin(self): + """Start halo exchanges.""" + for arg in self.unique_dat_args: + arg.local_to_global_begin() + + @mpi.collective + def local_to_global_end(self): + """Finish halo exchanges (wait on irecvs)""" + for arg in self.unique_dat_args: + arg.local_to_global_end() + + @utils.cached_property + def _reduction_event_begin(self): + return profiling.timed_region("ParLoopRednBegin") + + @utils.cached_property + def _reduction_event_end(self): + return profiling.timed_region("ParLoopRednEnd") + + @utils.cached_property + def _has_reduction(self): + return len(self.global_reduction_args) > 0 + + @mpi.collective + def reduction_begin(self): + """Start reductions""" + if not self._has_reduction: + return + with self._reduction_event_begin: + for arg in self.global_reduction_args: + arg.reduction_begin(self.comm) + + @mpi.collective + def reduction_end(self): + """End reductions""" + if not self._has_reduction: + return + with self._reduction_event_end: + for arg in self.global_reduction_args: + arg.reduction_end(self.comm) + # Finalise global increments + for tmp, glob in self._reduced_globals.items(): + glob._data += tmp._data + + @mpi.collective + def update_arg_data_state(self): + r"""Update the state of the :class:`DataCarrier`\s in the arguments to the `par_loop`. + This marks :class:`Mat`\s that need assembly.""" + for arg in self.args: + access = arg.access + if access is Access.READ: + continue + if arg._is_dat: + arg.data.halo_valid = False + if arg._is_mat: + state = {Access.WRITE: Mat.INSERT_VALUES, + Access.INC: Mat.ADD_VALUES}[access] + arg.data.assembly_state = state + + @utils.cached_property + def dat_args(self): + return tuple(arg for arg in self.args if arg._is_dat) + + @utils.cached_property + def unique_dat_args(self): + seen = {} + unique = [] + for arg in self.dat_args: + if arg.data not in seen: + unique.append(arg) + seen[arg.data] = arg + elif arg.access != seen[arg.data].access: + raise ValueError("Same Dat appears multiple times with different " + "access descriptors") + return tuple(unique) + + @utils.cached_property + def global_reduction_args(self): + return tuple(arg for arg in self.args if arg._is_global_reduction) + + @utils.cached_property + def kernel(self): + """Kernel executed by this parallel loop.""" + return self._kernel + + @utils.cached_property + def args(self): + """Arguments to this parallel loop.""" + return self._actual_args + + @utils.cached_property + def is_layered(self): + """Flag which triggers extrusion""" + return self._is_layered + + @utils.cached_property + def iteration_region(self): + """Specifies the part of the mesh the parallel loop will + be iterating over. The effect is the loop only iterates over + a certain part of an extruded mesh, for example on top cells, bottom cells or + interior facets.""" + return self._iteration_region + + +class ParLoop(AbstractParLoop): + + def log_flops(self, flops): + PETSc.Log.logFlops(flops) + + def prepare_arglist(self, iterset, *args): + arglist = iterset._kernel_args_ + for arg in args: + arglist += arg._kernel_args_ + seen = set() + for arg in args: + maps = arg.map_tuple + for map_ in maps: + if map_ is None: + continue + for k in map_._kernel_args_: + if k in seen: + continue + arglist += (k,) + seen.add(k) + return arglist + + @utils.cached_property + def _jitmodule(self): + return JITModule(self.kernel, self.iterset, *self.args, + iterate=self.iteration_region, + pass_layer_arg=self._pass_layer_arg) + + @utils.cached_property + def _compute_event(self): + return profiling.timed_region("ParLoop_{0}_{1}".format(self.iterset.name, self._jitmodule._wrapper_name)) + + @mpi.collective + def _compute(self, part, fun, *arglist): + with self._compute_event: + self.log_flops(part.size * self.num_flops) + fun(part.offset, part.offset + part.size, *arglist) + + +class PyParLoop(AbstractParLoop): + """A stub implementation of "Python" parallel loops. + + This basically executes a python function over the iteration set, + feeding it the appropriate data for each set entity. + + Example usage:: + + .. code-block:: python + + s = op2.Set(10) + d = op2.Dat(s) + d2 = op2.Dat(s**2) + + m = op2.Map(s, s, 2, np.dstack(np.arange(4), + np.roll(np.arange(4), -1))) + + def fn(x, y): + x[0] = y[0] + x[1] = y[1] + + d.data[:] = np.arange(4) + + op2.par_loop(fn, s, d2(op2.WRITE), d(op2.READ, m)) + + print d2.data + # [[ 0. 1.] + # [ 1. 2.] + # [ 2. 3.] + # [ 3. 0.]] + + def fn2(x, y): + x[0] += y[0] + x[1] += y[0] + + op2.par_loop(fn, s, d2(op2.INC), d(op2.READ, m[1])) + + print d2.data + # [[ 1. 2.] + # [ 3. 4.] + # [ 5. 6.] + # [ 3. 0.]] + """ + def __init__(self, kernel, *args, **kwargs): + if not isinstance(kernel, types.FunctionType): + raise ValueError("Expecting a python function, not a %r" % type(kernel)) + super().__init__(PyKernel(kernel), *args, **kwargs) + + def _compute(self, part, *arglist): + if part.set._extruded: + raise NotImplementedError + subset = isinstance(self.iterset, Subset) + + def arrayview(array, access): + array = array.view() + array.setflags(write=(access is not Access.READ)) + return array + + # Just walk over the iteration set + for e in range(part.offset, part.offset + part.size): + args = [] + if subset: + idx = self.iterset._indices[e] + else: + idx = e + for arg in self.args: + if arg._is_global: + args.append(arrayview(arg.data._data, arg.access)) + elif arg._is_direct: + args.append(arrayview(arg.data._data[idx, ...], arg.access)) + elif arg._is_indirect: + args.append(arrayview(arg.data._data[arg.map.values_with_halo[idx], ...], arg.access)) + elif arg._is_mat: + if arg.access not in {Access.INC, Access.WRITE}: + raise NotImplementedError + if arg._is_mixed_mat: + raise ValueError("Mixed Mats must be split before assembly") + shape = tuple(map(operator.attrgetter("arity"), arg.map_tuple)) + args.append(np.zeros(shape, dtype=arg.data.dtype)) + if args[-1].shape == (): + args[-1] = args[-1].reshape(1) + self._kernel(*args) + for arg, tmp in zip(self.args, args): + if arg.access is Access.READ: + continue + if arg._is_global: + arg.data._data[:] = tmp[:] + elif arg._is_direct: + arg.data._data[idx, ...] = tmp[:] + elif arg._is_indirect: + arg.data._data[arg.map.values_with_halo[idx], ...] = tmp[:] + elif arg._is_mat: + if arg.access is Access.INC: + arg.data.addto_values(arg.map[0].values_with_halo[idx], + arg.map[1].values_with_halo[idx], + tmp) + elif arg.access is Access.WRITE: + arg.data.set_values(arg.map[0].values_with_halo[idx], + arg.map[1].values_with_halo[idx], + tmp) + + for arg in self.args: + if arg._is_mat and arg.access is not Access.READ: + # Queue up assembly of matrix + arg.data.assemble() + + +def check_iterset(args, iterset): + """Checks that the iteration set of the :class:`ParLoop` matches the + iteration set of all its arguments. A :class:`MapValueError` is raised + if this condition is not met.""" + + if isinstance(iterset, Subset): + _iterset = iterset.superset + else: + _iterset = iterset + if conf.configuration["type_check"]: + if isinstance(_iterset, MixedSet): + raise ex.SetTypeError("Cannot iterate over MixedSets") + for i, arg in enumerate(args): + if arg._is_global: + continue + if arg._is_direct: + if isinstance(_iterset, ExtrudedSet): + if arg.data.dataset.set != _iterset.parent: + raise ex.MapValueError( + "Iterset of direct arg %s doesn't match ParLoop iterset." % i) + elif arg.data.dataset.set != _iterset: + raise ex.MapValueError( + "Iterset of direct arg %s doesn't match ParLoop iterset." % i) + continue + for j, m in enumerate(arg._map): + if isinstance(_iterset, ExtrudedSet): + if m.iterset != _iterset and m.iterset not in _iterset: + raise ex.MapValueError( + "Iterset of arg %s map %s doesn't match ParLoop iterset." % (i, j)) + elif m.iterset != _iterset and m.iterset not in _iterset: + raise ex.MapValueError( + "Iterset of arg %s map %s doesn't match ParLoop iterset." % (i, j)) + + +@mpi.collective +def par_loop(kernel, iterset, *args, **kwargs): + r"""Invocation of an OP2 kernel + + :arg kernel: The :class:`Kernel` to be executed. + :arg iterset: The iteration :class:`Set` over which the kernel should be + executed. + :arg \*args: One or more :class:`base.Arg`\s constructed from a + :class:`Global`, :class:`Dat` or :class:`Mat` using the call + syntax and passing in an optionally indexed :class:`Map` + through which this :class:`base.Arg` is accessed and the + :class:`base.Access` descriptor indicating how the + :class:`Kernel` is going to access this data (see the example + below). These are the global data structures from and to + which the kernel will read and write. + :kwarg iterate: Optionally specify which region of an + :class:`ExtrudedSet` to iterate over. + Valid values are: + + - ``ON_BOTTOM``: iterate over the bottom layer of cells. + - ``ON_TOP`` iterate over the top layer of cells. + - ``ALL`` iterate over all cells (the default if unspecified) + - ``ON_INTERIOR_FACETS`` iterate over all the layers + except the top layer, accessing data two adjacent (in + the extruded direction) cells at a time. + + :kwarg pass_layer_arg: Should the wrapper pass the current layer + into the kernel (as an ``int``). Only makes sense for + indirect extruded iteration. + + .. warning :: + It is the caller's responsibility that the number and type of all + :class:`base.Arg`\s passed to the :func:`par_loop` match those expected + by the :class:`Kernel`. No runtime check is performed to ensure this! + + :func:`par_loop` invocation is illustrated by the following example :: + + pyop2.par_loop(mass, elements, + mat(pyop2.INC, (elem_node[pyop2.i[0]]), elem_node[pyop2.i[1]]), + coords(pyop2.READ, elem_node)) + + This example will execute the :class:`Kernel` ``mass`` over the + :class:`Set` ``elements`` executing 3x3 times for each + :class:`Set` member, assuming the :class:`Map` ``elem_node`` is of arity 3. + The :class:`Kernel` takes four arguments, the first is a :class:`Mat` named + ``mat``, the second is a field named ``coords``. The remaining two arguments + indicate which local iteration space point the kernel is to execute. + + A :class:`Mat` requires a pair of :class:`Map` objects, one each + for the row and column spaces. In this case both are the same + ``elem_node`` map. The row :class:`Map` is indexed by the first + index in the local iteration space, indicated by the ``0`` index + to :data:`pyop2.i`, while the column space is indexed by + the second local index. The matrix is accessed to increment + values using the ``pyop2.INC`` access descriptor. + + The ``coords`` :class:`Dat` is also accessed via the ``elem_node`` + :class:`Map`, however no indices are passed so all entries of + ``elem_node`` for the relevant member of ``elements`` will be + passed to the kernel as a vector. + """ + if isinstance(kernel, types.FunctionType): + return PyParLoop(kernel, iterset, *args, **kwargs).compute() + return ParLoop(kernel, iterset, *args, **kwargs).compute() + + +def generate_single_cell_wrapper(iterset, args, forward_args=(), kernel_name=None, wrapper_name=None): + """Generates wrapper for a single cell. No iteration loop, but cellwise data is extracted. + Cell is expected as an argument to the wrapper. For extruded, the numbering of the cells + is columnwise continuous, bottom to top. + + :param iterset: The iteration set + :param args: :class:`Arg`s + :param forward_args: To forward unprocessed arguments to the kernel via the wrapper, + give an iterable of strings describing their C types. + :param kernel_name: Kernel function name + :param wrapper_name: Wrapper function name + + :return: string containing the C code for the single-cell wrapper + """ + from pyop2.codegen.builder import WrapperBuilder + from pyop2.codegen.rep2loopy import generate + from loopy.types import OpaqueType + + forward_arg_types = [OpaqueType(fa) for fa in forward_args] + empty_kernel = Kernel("", kernel_name) + builder = WrapperBuilder(kernel=empty_kernel, + iterset=iterset, single_cell=True, + forward_arg_types=forward_arg_types) + for arg in args: + builder.add_argument(arg) + wrapper = generate(builder, wrapper_name) + code = lp.generate_code_v2(wrapper) + + return code.device_code() diff --git a/pyop2/pyparloop.py b/pyop2/pyparloop.py deleted file mode 100644 index 8d1381f60..000000000 --- a/pyop2/pyparloop.py +++ /dev/null @@ -1,168 +0,0 @@ -# This file is part of PyOP2 -# -# PyOP2 is Copyright (c) 2012-2014, Imperial College London and -# others. Please see the AUTHORS file in the main source directory for -# a full list of copyright holders. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * The name of Imperial College London or that of other -# contributors may not be used to endorse or promote products -# derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS -# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. - -"""A stub implementation of "Python" parallel loops. - -This basically executes a python function over the iteration set, -feeding it the appropriate data for each set entity. - -Example usage:: - -.. code-block:: python - - s = op2.Set(10) - d = op2.Dat(s) - d2 = op2.Dat(s**2) - - m = op2.Map(s, s, 2, np.dstack(np.arange(4), - np.roll(np.arange(4), -1))) - - def fn(x, y): - x[0] = y[0] - x[1] = y[1] - - d.data[:] = np.arange(4) - - op2.par_loop(fn, s, d2(op2.WRITE), d(op2.READ, m)) - - print d2.data - # [[ 0. 1.] - # [ 1. 2.] - # [ 2. 3.] - # [ 3. 0.]] - - def fn2(x, y): - x[0] += y[0] - x[1] += y[0] - - op2.par_loop(fn, s, d2(op2.INC), d(op2.READ, m[1])) - - print d2.data - # [[ 1. 2.] - # [ 3. 4.] - # [ 5. 6.] - # [ 3. 0.]] -""" - -from operator import attrgetter -import numpy as np -import types -from pyop2 import base - - -# Fake kernel for type checking -class Kernel(base.Kernel): - @classmethod - def _cache_key(cls, *args, **kwargs): - return None - - def __init__(self, code, name=None, **kwargs): - self._func = code - self._name = name - - def __getattr__(self, attr): - """Return None on unrecognised attributes""" - return None - - def __call__(self, *args): - return self._func(*args) - - def __repr__(self): - return 'Kernel("""%s""", %r)' % (self._func, self._name) - - -# Inherit from parloop for type checking and init -class ParLoop(base.ParLoop): - - def __init__(self, kernel, *args, **kwargs): - if not isinstance(kernel, types.FunctionType): - raise ValueError("Expecting a python function, not a %r" % type(kernel)) - super().__init__(Kernel(kernel), *args, **kwargs) - - def _compute(self, part, *arglist): - if part.set._extruded: - raise NotImplementedError - subset = isinstance(self.iterset, base.Subset) - - def arrayview(array, access): - array = array.view() - array.setflags(write=(access is not base.READ)) - return array - - # Just walk over the iteration set - for e in range(part.offset, part.offset + part.size): - args = [] - if subset: - idx = self.iterset._indices[e] - else: - idx = e - for arg in self.args: - if arg._is_global: - args.append(arrayview(arg.data._data, arg.access)) - elif arg._is_direct: - args.append(arrayview(arg.data._data[idx, ...], arg.access)) - elif arg._is_indirect: - args.append(arrayview(arg.data._data[arg.map.values_with_halo[idx], ...], arg.access)) - elif arg._is_mat: - if arg.access not in [base.INC, base.WRITE]: - raise NotImplementedError - if arg._is_mixed_mat: - raise ValueError("Mixed Mats must be split before assembly") - shape = tuple(map(attrgetter("arity"), arg.map_tuple)) - args.append(np.zeros(shape, dtype=arg.data.dtype)) - if args[-1].shape == (): - args[-1] = args[-1].reshape(1) - self._kernel(*args) - for arg, tmp in zip(self.args, args): - if arg.access is base.READ: - continue - if arg._is_global: - arg.data._data[:] = tmp[:] - elif arg._is_direct: - arg.data._data[idx, ...] = tmp[:] - elif arg._is_indirect: - arg.data._data[arg.map.values_with_halo[idx], ...] = tmp[:] - elif arg._is_mat: - if arg.access is base.INC: - arg.data.addto_values(arg.map[0].values_with_halo[idx], - arg.map[1].values_with_halo[idx], - tmp) - elif arg.access is base.WRITE: - arg.data.set_values(arg.map[0].values_with_halo[idx], - arg.map[1].values_with_halo[idx], - tmp) - - for arg in self.args: - if arg._is_mat and arg.access is not base.READ: - # Queue up assembly of matrix - arg.data.assemble() diff --git a/pyop2/sequential.py b/pyop2/sequential.py deleted file mode 100644 index ff8189be0..000000000 --- a/pyop2/sequential.py +++ /dev/null @@ -1,251 +0,0 @@ -# This file is part of PyOP2 -# -# PyOP2 is Copyright (c) 2012, Imperial College London and -# others. Please see the AUTHORS file in the main source directory for -# a full list of copyright holders. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * The name of Imperial College London or that of other -# contributors may not be used to endorse or promote products -# derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS -# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. - -"""OP2 sequential backend.""" - -import os -from copy import deepcopy as dcopy - -import ctypes - -from pyop2.datatypes import IntType, as_ctypes -from pyop2 import base -from pyop2 import compilation -from pyop2 import petsc_base -from pyop2.base import par_loop # noqa: F401 -from pyop2.base import READ, WRITE, RW, INC, MIN, MAX # noqa: F401 -from pyop2.base import ALL -from pyop2.base import Map, MixedMap, PermutedMap, Sparsity, Halo # noqa: F401 -from pyop2.base import Set, ExtrudedSet, MixedSet, Subset # noqa: F401 -from pyop2.base import DatView # noqa: F401 -from pyop2.base import Kernel # noqa: F401 -from pyop2.base import Arg # noqa: F401 -from pyop2.petsc_base import DataSet, MixedDataSet # noqa: F401 -from pyop2.petsc_base import Global, GlobalDataSet # noqa: F401 -from pyop2.petsc_base import Dat, MixedDat, Mat # noqa: F401 -from pyop2.exceptions import * # noqa: F401 -from pyop2.mpi import collective -from pyop2.profiling import timed_region -from pyop2.utils import cached_property, get_petsc_dir - -from petsc4py import PETSc -import loopy - - -class JITModule(base.JITModule): - - _cppargs = [] - _libraries = [] - _system_headers = [] - - def __init__(self, kernel, iterset, *args, **kwargs): - r""" - A cached compiled function to execute for a specified par_loop. - - See :func:`~.par_loop` for the description of arguments. - - .. warning :: - - Note to implementors. This object is *cached*, and therefore - should not hold any long term references to objects that - you want to be collected. In particular, after the - ``args`` have been inspected to produce the compiled code, - they **must not** remain part of the object's slots, - otherwise they (and the :class:`~.Dat`\s, :class:`~.Map`\s - and :class:`~.Mat`\s they reference) will never be collected. - """ - # Return early if we were in the cache. - if self._initialized: - return - self.comm = iterset.comm - self._kernel = kernel - self._fun = None - self._iterset = iterset - self._args = args - self._iteration_region = kwargs.get('iterate', ALL) - self._pass_layer_arg = kwargs.get('pass_layer_arg', False) - # Copy the class variables, so we don't overwrite them - self._cppargs = dcopy(type(self)._cppargs) - self._libraries = dcopy(type(self)._libraries) - self._system_headers = dcopy(type(self)._system_headers) - if not kwargs.get('delay', False): - self.compile() - self._initialized = True - - @collective - def __call__(self, *args): - return self._fun(*args) - - @cached_property - def _wrapper_name(self): - return 'wrap_%s' % self._kernel.name - - @cached_property - def code_to_compile(self): - from pyop2.codegen.builder import WrapperBuilder - from pyop2.codegen.rep2loopy import generate - - builder = WrapperBuilder(kernel=self._kernel, - iterset=self._iterset, - iteration_region=self._iteration_region, - pass_layer_to_kernel=self._pass_layer_arg) - for arg in self._args: - builder.add_argument(arg) - - wrapper = generate(builder) - code = loopy.generate_code_v2(wrapper) - - if self._kernel._cpp: - from loopy.codegen.result import process_preambles - preamble = "".join(process_preambles(getattr(code, "device_preambles", []))) - device_code = "\n\n".join(str(dp.ast) for dp in code.device_programs) - return preamble + "\nextern \"C\" {\n" + device_code + "\n}\n" - return code.device_code() - - @PETSc.Log.EventDecorator() - @collective - def compile(self): - # If we weren't in the cache we /must/ have arguments - if not hasattr(self, '_args'): - raise RuntimeError("JITModule has no args associated with it, should never happen") - - from pyop2.configuration import configuration - - compiler = configuration["compiler"] - extension = "cpp" if self._kernel._cpp else "c" - cppargs = self._cppargs - cppargs += ["-I%s/include" % d for d in get_petsc_dir()] + \ - ["-I%s" % d for d in self._kernel._include_dirs] + \ - ["-I%s" % os.path.abspath(os.path.dirname(__file__))] - ldargs = ["-L%s/lib" % d for d in get_petsc_dir()] + \ - ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + \ - ["-lpetsc", "-lm"] + self._libraries - ldargs += self._kernel._ldargs - - self._fun = compilation.load(self, - extension, - self._wrapper_name, - cppargs=cppargs, - ldargs=ldargs, - restype=ctypes.c_int, - compiler=compiler, - comm=self.comm) - # Blow away everything we don't need any more - del self._args - del self._kernel - del self._iterset - - @cached_property - def argtypes(self): - index_type = as_ctypes(IntType) - argtypes = (index_type, index_type) - argtypes += self._iterset._argtypes_ - for arg in self._args: - argtypes += arg._argtypes_ - seen = set() - for arg in self._args: - maps = arg.map_tuple - for map_ in maps: - for k, t in zip(map_._kernel_args_, map_._argtypes_): - if k in seen: - continue - argtypes += (t,) - seen.add(k) - return argtypes - - -class ParLoop(petsc_base.ParLoop): - - def prepare_arglist(self, iterset, *args): - arglist = iterset._kernel_args_ - for arg in args: - arglist += arg._kernel_args_ - seen = set() - for arg in args: - maps = arg.map_tuple - for map_ in maps: - if map_ is None: - continue - for k in map_._kernel_args_: - if k in seen: - continue - arglist += (k,) - seen.add(k) - return arglist - - @cached_property - def _jitmodule(self): - return JITModule(self.kernel, self.iterset, *self.args, - iterate=self.iteration_region, - pass_layer_arg=self._pass_layer_arg) - - @cached_property - def _compute_event(self): - return timed_region("ParLoop_{0}_{1}".format(self.iterset.name, self._jitmodule._wrapper_name)) - - @collective - def _compute(self, part, fun, *arglist): - with self._compute_event: - self.log_flops(part.size * self.num_flops) - fun(part.offset, part.offset + part.size, *arglist) - - -def generate_single_cell_wrapper(iterset, args, forward_args=(), kernel_name=None, wrapper_name=None): - """Generates wrapper for a single cell. No iteration loop, but cellwise data is extracted. - Cell is expected as an argument to the wrapper. For extruded, the numbering of the cells - is columnwise continuous, bottom to top. - - :param iterset: The iteration set - :param args: :class:`Arg`s - :param forward_args: To forward unprocessed arguments to the kernel via the wrapper, - give an iterable of strings describing their C types. - :param kernel_name: Kernel function name - :param wrapper_name: Wrapper function name - - :return: string containing the C code for the single-cell wrapper - """ - from pyop2.codegen.builder import WrapperBuilder - from pyop2.codegen.rep2loopy import generate - from loopy.types import OpaqueType - - forward_arg_types = [OpaqueType(fa) for fa in forward_args] - empty_kernel = Kernel("", kernel_name) - builder = WrapperBuilder(kernel=empty_kernel, - iterset=iterset, single_cell=True, - forward_arg_types=forward_arg_types) - for arg in args: - builder.add_argument(arg) - wrapper = generate(builder, wrapper_name) - code = loopy.generate_code_v2(wrapper) - - return code.device_code() diff --git a/pyop2/types/__init__.py b/pyop2/types/__init__.py new file mode 100644 index 000000000..e6aefdfe8 --- /dev/null +++ b/pyop2/types/__init__.py @@ -0,0 +1,9 @@ +from .access import * # noqa: F401 +from .data_carrier import * # noqa: F401 +from .dataset import * # noqa: F401 +from .dat import * # noqa: F401 +from .glob import * # noqa: F401 +from .halo import * # noqa: F401 +from .map import * # noqa: F401 +from .mat import * # noqa: F401 +from .set import * # noqa: F401 diff --git a/pyop2/types/access.py b/pyop2/types/access.py new file mode 100644 index 000000000..c3e2fe003 --- /dev/null +++ b/pyop2/types/access.py @@ -0,0 +1,37 @@ +import enum + + +class Access(enum.IntEnum): + READ = 1 + WRITE = 2 + RW = 3 + INC = 4 + MIN = 5 + MAX = 6 + + +READ = Access.READ +"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed read-only.""" + +WRITE = Access.WRITE +"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed write-only, +and OP2 is not required to handle write conflicts.""" + +RW = Access.RW +"""The :class:`Global`, :class:`Dat`, or :class:`Mat` is accessed for reading +and writing, and OP2 is not required to handle write conflicts.""" + +INC = Access.INC +"""The kernel computes increments to be summed onto a :class:`Global`, +:class:`Dat`, or :class:`Mat`. OP2 is responsible for managing the write +conflicts caused.""" + +MIN = Access.MIN +"""The kernel contributes to a reduction into a :class:`Global` using a ``min`` +operation. OP2 is responsible for reducing over the different kernel +invocations.""" + +MAX = Access.MAX +"""The kernel contributes to a reduction into a :class:`Global` using a ``max`` +operation. OP2 is responsible for reducing over the different kernel +invocations.""" diff --git a/pyop2/types/dat.py b/pyop2/types/dat.py new file mode 100644 index 000000000..9abfa6d9c --- /dev/null +++ b/pyop2/types/dat.py @@ -0,0 +1,1023 @@ +import abc +import contextlib +import ctypes +import itertools +import operator + +import loopy as lp +import numpy as np +from petsc4py import PETSc + +from pyop2 import ( + configuration as conf, + datatypes as dtypes, + exceptions as ex, + mpi, + utils +) +from pyop2.types.access import Access +from pyop2.types.dataset import DataSet, GlobalDataSet, MixedDataSet +from pyop2.types.data_carrier import DataCarrier, EmptyDataMixin, VecAccessMixin +from pyop2.types.set import ExtrudedSet, GlobalSet, Set + + +class AbstractDat(DataCarrier, EmptyDataMixin, abc.ABC): + """OP2 vector data. A :class:`Dat` holds values on every element of a + :class:`DataSet`. + + If a :class:`Set` is passed as the ``dataset`` argument, rather + than a :class:`DataSet`, the :class:`Dat` is created with a default + :class:`DataSet` dimension of 1. + + If a :class:`Dat` is passed as the ``dataset`` argument, a copy is + returned. + + It is permissible to pass `None` as the `data` argument. In this + case, allocation of the data buffer is postponed until it is + accessed. + + .. note:: + If the data buffer is not passed in, it is implicitly + initialised to be zero. + + When a :class:`Dat` is passed to :func:`pyop2.op2.par_loop`, the map via + which indirection occurs and the access descriptor are passed by + calling the :class:`Dat`. For instance, if a :class:`Dat` named ``D`` is + to be accessed for reading via a :class:`Map` named ``M``, this is + accomplished by :: + + D(pyop2.READ, M) + + The :class:`Map` through which indirection occurs can be indexed + using the index notation described in the documentation for the + :class:`Map`. Direct access to a Dat is accomplished by + omitting the path argument. + + :class:`Dat` objects support the pointwise linear algebra operations + ``+=``, ``*=``, ``-=``, ``/=``, where ``*=`` and ``/=`` also support + multiplication / division by a scalar. + """ + + _zero_kernels = {} + """Class-level cache for zero kernels.""" + + _modes = [Access.READ, Access.WRITE, Access.RW, Access.INC, Access.MIN, Access.MAX] + + @utils.cached_property + def pack(self): + from pyop2.codegen.builder import DatPack + return DatPack + + @utils.validate_type(('dataset', (DataCarrier, DataSet, Set), ex.DataSetTypeError), + ('name', str, ex.NameTypeError)) + @utils.validate_dtype(('dtype', None, ex.DataTypeError)) + def __init__(self, dataset, data=None, dtype=None, name=None): + + if isinstance(dataset, Dat): + self.__init__(dataset.dataset, None, dtype=dataset.dtype, + name="copy_of_%s" % dataset.name) + dataset.copy(self) + return + if type(dataset) is Set or type(dataset) is ExtrudedSet: + # If a Set, rather than a dataset is passed in, default to + # a dataset dimension of 1. + dataset = dataset ** 1 + self._shape = (dataset.total_size,) + (() if dataset.cdim == 1 else dataset.dim) + EmptyDataMixin.__init__(self, data, dtype, self._shape) + + self._dataset = dataset + self.comm = dataset.comm + self.halo_valid = True + self._name = name or "dat_#x%x" % id(self) + + @utils.cached_property + def _kernel_args_(self): + return (self._data.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dtype, self._dataset._wrapper_cache_key_) + + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, path=None): + from pyop2.parloop import Arg + if conf.configuration["type_check"] and path and path.toset != self.dataset.set: + raise ex.MapValueError("To Set of Map does not match Set of Dat.") + return Arg(data=self, map=path, access=access) + + def __getitem__(self, idx): + """Return self if ``idx`` is 0, raise an error otherwise.""" + if idx != 0: + raise ex.IndexValueError("Can only extract component 0 from %r" % self) + return self + + @utils.cached_property + def split(self): + """Tuple containing only this :class:`Dat`.""" + return (self,) + + @utils.cached_property + def dataset(self): + """:class:`DataSet` on which the Dat is defined.""" + return self._dataset + + @utils.cached_property + def dim(self): + """The shape of the values for each element of the object.""" + return self.dataset.dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the object. This is + the product of the dim tuple.""" + return self.dataset.cdim + + @property + @mpi.collective + def data(self): + """Numpy array containing the data values. + + With this accessor you are claiming that you will modify + the values you get back. If you only need to look at the + values, use :meth:`data_ro` instead. + + This only shows local values, to see the halo values too use + :meth:`data_with_halos`. + + """ + if self.dataset.total_size > 0 and self._data.size == 0 and self.cdim > 0: + raise RuntimeError("Illegal access: no data associated with this Dat!") + self.halo_valid = False + v = self._data[:self.dataset.size].view() + v.setflags(write=True) + return v + + @property + @mpi.collective + def data_with_halos(self): + r"""A view of this :class:`Dat`\s data. + + This accessor marks the :class:`Dat` as dirty, see + :meth:`data` for more details on the semantics. + + With this accessor, you get to see up to date halo values, but + you should not try and modify them, because they will be + overwritten by the next halo exchange.""" + self.global_to_local_begin(Access.RW) + self.global_to_local_end(Access.RW) + self.halo_valid = False + v = self._data.view() + v.setflags(write=True) + return v + + @property + @mpi.collective + def data_ro(self): + """Numpy array containing the data values. Read-only. + + With this accessor you are not allowed to modify the values + you get back. If you need to do so, use :meth:`data` instead. + + This only shows local values, to see the halo values too use + :meth:`data_ro_with_halos`. + + """ + if self.dataset.total_size > 0 and self._data.size == 0 and self.cdim > 0: + raise RuntimeError("Illegal access: no data associated with this Dat!") + v = self._data[:self.dataset.size].view() + v.setflags(write=False) + return v + + @property + @mpi.collective + def data_ro_with_halos(self): + r"""A view of this :class:`Dat`\s data. + + This accessor does not mark the :class:`Dat` as dirty, and is + a read only view, see :meth:`data_ro` for more details on the + semantics. + + With this accessor, you get to see up to date halo values, but + you should not try and modify them, because they will be + overwritten by the next halo exchange. + + """ + self.global_to_local_begin(Access.READ) + self.global_to_local_end(Access.READ) + v = self._data.view() + v.setflags(write=False) + return v + + def save(self, filename): + """Write the data array to file ``filename`` in NumPy format.""" + np.save(filename, self.data_ro) + + def load(self, filename): + """Read the data stored in file ``filename`` into a NumPy array + and store the values in :meth:`_data`. + """ + # The np.save method appends a .npy extension to the file name + # if the user has not supplied it. However, np.load does not, + # so we need to handle this ourselves here. + if(filename[-4:] != ".npy"): + filename = filename + ".npy" + + if isinstance(self.data, tuple): + # MixedDat case + for d, d_from_file in zip(self.data, np.load(filename)): + d[:] = d_from_file[:] + else: + self.data[:] = np.load(filename) + + @utils.cached_property + def shape(self): + return self._shape + + @utils.cached_property + def dtype(self): + return self._dtype + + @utils.cached_property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`Dat` in bytes. This will be the correct size of the data + payload, but does not take into account the (presumably small) + overhead of the object and its metadata. + + Note that this is the process local memory usage, not the sum + over all MPI processes. + """ + + return self.dtype.itemsize * self.dataset.total_size * self.dataset.cdim + + @mpi.collective + def zero(self, subset=None): + """Zero the data associated with this :class:`Dat` + + :arg subset: A :class:`Subset` of entries to zero (optional).""" + # If there is no subset we can safely zero the halo values. + if subset is None: + self._data[:] = 0 + self.halo_valid = True + elif subset.superset != self.dataset.set: + raise ex.MapValueError("The subset and dataset are incompatible") + else: + self.data[subset.owned_indices] = 0 + + @mpi.collective + def copy(self, other, subset=None): + """Copy the data in this :class:`Dat` into another. + + :arg other: The destination :class:`Dat` + :arg subset: A :class:`Subset` of elements to copy (optional)""" + if other is self: + return + if subset is None: + # If the current halo is valid we can also copy these values across. + if self.halo_valid: + other._data[:] = self._data + other.halo_valid = True + else: + other.data[:] = self.data_ro + elif subset.superset != self.dataset.set: + raise ex.MapValueError("The subset and dataset are incompatible") + else: + other.data[subset.owned_indices] = self.data_ro[subset.owned_indices] + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 Dat: %s on (%s) with datatype %s" \ + % (self._name, self._dataset, self.dtype.name) + + def __repr__(self): + return "Dat(%r, None, %r, %r)" \ + % (self._dataset, self.dtype, self._name) + + def _check_shape(self, other): + if other.dataset.dim != self.dataset.dim: + raise ValueError('Mismatched shapes in operands %s and %s', + self.dataset.dim, other.dataset.dim) + + def _op_kernel(self, op, globalp, dtype): + from pyop2.kernel import Kernel + key = (op, globalp, dtype) + try: + if not hasattr(self, "_op_kernel_cache"): + self._op_kernel_cache = {} + return self._op_kernel_cache[key] + except KeyError: + pass + import islpy as isl + import pymbolic.primitives as p + name = "binop_%s" % op.__name__ + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + _other = p.Variable("other") + _self = p.Variable("self") + _ret = p.Variable("ret") + i = p.Variable("i") + lhs = _ret.index(i) + if globalp: + rhs = _other.index(0) + rshape = (1, ) + else: + rhs = _other.index(i) + rshape = (self.cdim, ) + insn = lp.Assignment(lhs, op(_self.index(i), rhs), within_inames=frozenset(["i"])) + data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), + lp.GlobalArg("other", dtype=dtype, shape=rshape), + lp.GlobalArg("ret", dtype=self.dtype, shape=(self.cdim,))] + knl = lp.make_function([domain], [insn], data, name=name, target=lp.CTarget(), lang_version=(2018, 2)) + return self._op_kernel_cache.setdefault(key, Kernel(knl, name)) + + def _op(self, other, op): + from pyop2.parloop import par_loop + from pyop2.types.glob import Global + ret = Dat(self.dataset, None, self.dtype) + if np.isscalar(other): + other = Global(1, data=other) + globalp = True + else: + self._check_shape(other) + globalp = False + par_loop(self._op_kernel(op, globalp, other.dtype), + self.dataset.set, self(Access.READ), other(Access.READ), ret(Access.WRITE)) + return ret + + def _iop_kernel(self, op, globalp, other_is_self, dtype): + key = (op, globalp, other_is_self, dtype) + try: + if not hasattr(self, "_iop_kernel_cache"): + self._iop_kernel_cache = {} + return self._iop_kernel_cache[key] + except KeyError: + pass + import islpy as isl + import pymbolic.primitives as p + from pyop2.parloop import Kernel + name = "iop_%s" % op.__name__ + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + _other = p.Variable("other") + _self = p.Variable("self") + i = p.Variable("i") + lhs = _self.index(i) + rshape = (self.cdim, ) + if globalp: + rhs = _other.index(0) + rshape = (1, ) + elif other_is_self: + rhs = _self.index(i) + else: + rhs = _other.index(i) + insn = lp.Assignment(lhs, op(lhs, rhs), within_inames=frozenset(["i"])) + data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))] + if not other_is_self: + data.append(lp.GlobalArg("other", dtype=dtype, shape=rshape)) + knl = lp.make_function([domain], [insn], data, name=name, target=lp.CTarget(), lang_version=(2018, 2)) + return self._iop_kernel_cache.setdefault(key, Kernel(knl, name)) + + def _iop(self, other, op): + from pyop2.parloop import par_loop + from pyop2.types.glob import Global + globalp = False + if np.isscalar(other): + other = Global(1, data=other) + globalp = True + elif other is not self: + self._check_shape(other) + args = [self(Access.INC)] + if other is not self: + args.append(other(Access.READ)) + par_loop(self._iop_kernel(op, globalp, other is self, other.dtype), self.dataset.set, *args) + return self + + def _inner_kernel(self, dtype): + try: + if not hasattr(self, "_inner_kernel_cache"): + self._inner_kernel_cache = {} + return self._inner_kernel_cache[dtype] + except KeyError: + pass + import islpy as isl + import pymbolic.primitives as p + from pyop2.kernel import Kernel + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + _self = p.Variable("self") + _other = p.Variable("other") + _ret = p.Variable("ret") + _conj = p.Variable("conj") if dtype.kind == "c" else lambda x: x + i = p.Variable("i") + insn = lp.Assignment(_ret[0], _ret[0] + _self[i]*_conj(_other[i]), + within_inames=frozenset(["i"])) + data = [lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,)), + lp.GlobalArg("other", dtype=dtype, shape=(self.cdim,)), + lp.GlobalArg("ret", dtype=self.dtype, shape=(1,))] + knl = lp.make_function([domain], [insn], data, name="inner", target=lp.CTarget(), lang_version=(2018, 2)) + k = Kernel(knl, "inner") + return self._inner_kernel_cache.setdefault(dtype, k) + + def inner(self, other): + """Compute the l2 inner product of the flattened :class:`Dat` + + :arg other: the other :class:`Dat` to compute the inner + product against. The complex conjugate of this is taken. + + """ + from pyop2.parloop import par_loop + from pyop2.types.glob import Global + self._check_shape(other) + ret = Global(1, data=0, dtype=self.dtype) + par_loop(self._inner_kernel(other.dtype), self.dataset.set, + self(Access.READ), other(Access.READ), ret(Access.INC)) + return ret.data_ro[0] + + @property + def norm(self): + """Compute the l2 norm of this :class:`Dat` + + .. note:: + + This acts on the flattened data (see also :meth:`inner`).""" + from math import sqrt + return sqrt(self.inner(self).real) + + def __pos__(self): + pos = Dat(self) + return pos + + def __add__(self, other): + """Pointwise addition of fields.""" + return self._op(other, operator.add) + + def __radd__(self, other): + """Pointwise addition of fields. + + self.__radd__(other) <==> other + self.""" + return self + other + + @utils.cached_property + def _neg_kernel(self): + # Copy and negate in one go. + import islpy as isl + import pymbolic.primitives as p + from pyop2.kernel import Kernel + name = "neg" + inames = isl.make_zero_and_vars(["i"]) + domain = (inames[0].le_set(inames["i"])) & (inames["i"].lt_set(inames[0] + self.cdim)) + lvalue = p.Variable("other") + rvalue = p.Variable("self") + i = p.Variable("i") + insn = lp.Assignment(lvalue.index(i), -rvalue.index(i), within_inames=frozenset(["i"])) + data = [lp.GlobalArg("other", dtype=self.dtype, shape=(self.cdim,)), + lp.GlobalArg("self", dtype=self.dtype, shape=(self.cdim,))] + knl = lp.make_function([domain], [insn], data, name=name, target=lp.CTarget(), lang_version=(2018, 2)) + return Kernel(knl, name) + + def __neg__(self): + from pyop2.parloop import par_loop + neg = Dat(self.dataset, dtype=self.dtype) + par_loop(self._neg_kernel, self.dataset.set, neg(Access.WRITE), self(Access.READ)) + return neg + + def __sub__(self, other): + """Pointwise subtraction of fields.""" + return self._op(other, operator.sub) + + def __rsub__(self, other): + """Pointwise subtraction of fields. + + self.__rsub__(other) <==> other - self.""" + ret = -self + ret += other + return ret + + def __mul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._op(other, operator.mul) + + def __rmul__(self, other): + """Pointwise multiplication or scaling of fields. + + self.__rmul__(other) <==> other * self.""" + return self.__mul__(other) + + def __truediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._op(other, operator.truediv) + + __div__ = __truediv__ # Python 2 compatibility + + def __iadd__(self, other): + """Pointwise addition of fields.""" + return self._iop(other, operator.iadd) + + def __isub__(self, other): + """Pointwise subtraction of fields.""" + return self._iop(other, operator.isub) + + def __imul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._iop(other, operator.imul) + + def __itruediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._iop(other, operator.itruediv) + + @mpi.collective + def global_to_local_begin(self, access_mode): + """Begin a halo exchange from global to ghosted representation. + + :kwarg access_mode: Mode with which the data will subsequently + be accessed.""" + halo = self.dataset.halo + if halo is None: + return + if not self.halo_valid and access_mode in {Access.READ, Access.RW}: + halo.global_to_local_begin(self, Access.WRITE) + elif access_mode in {Access.INC, Access.MIN, Access.MAX}: + min_, max_ = dtypes.dtype_limits(self.dtype) + val = {Access.MAX: min_, Access.MIN: max_, Access.INC: 0}[access_mode] + self._data[self.dataset.size:] = val + else: + # WRITE + pass + + @mpi.collective + def global_to_local_end(self, access_mode): + """End a halo exchange from global to ghosted representation. + + :kwarg access_mode: Mode with which the data will subsequently + be accessed.""" + halo = self.dataset.halo + if halo is None: + return + if not self.halo_valid and access_mode in {Access.READ, Access.RW}: + halo.global_to_local_end(self, Access.WRITE) + self.halo_valid = True + elif access_mode in {Access.INC, Access.MIN, Access.MAX}: + self.halo_valid = False + else: + # WRITE + pass + + @mpi.collective + def local_to_global_begin(self, insert_mode): + """Begin a halo exchange from ghosted to global representation. + + :kwarg insert_mode: insertion mode (an access descriptor)""" + halo = self.dataset.halo + if halo is None: + return + halo.local_to_global_begin(self, insert_mode) + + @mpi.collective + def local_to_global_end(self, insert_mode): + """End a halo exchange from ghosted to global representation. + + :kwarg insert_mode: insertion mode (an access descriptor)""" + halo = self.dataset.halo + if halo is None: + return + halo.local_to_global_end(self, insert_mode) + self.halo_valid = False + + +class DatView(AbstractDat): + """An indexed view into a :class:`Dat`. + + This object can be used like a :class:`Dat` but the kernel will + only see the requested index, rather than the full data. + + :arg dat: The :class:`Dat` to create a view into. + :arg index: The component to select a view of. + """ + def __init__(self, dat, index): + index = utils.as_tuple(index) + assert len(index) == len(dat.dim) + for i, d in zip(index, dat.dim): + if not (0 <= i < d): + raise ex.IndexValueError("Can't create DatView with index %s for Dat with shape %s" % (index, dat.dim)) + self.index = index + # Point at underlying data + super(DatView, self).__init__(dat.dataset, + dat._data, + dtype=dat.dtype, + name="view[%s](%s)" % (index, dat.name)) + self._parent = dat + + @utils.cached_property + def _kernel_args_(self): + return self._parent._kernel_args_ + + @utils.cached_property + def _argtypes_(self): + return self._parent._argtypes_ + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.index, self._parent._wrapper_cache_key_) + + @utils.cached_property + def cdim(self): + return 1 + + @utils.cached_property + def dim(self): + return (1, ) + + @utils.cached_property + def shape(self): + return (self.dataset.total_size, ) + + @property + def data(self): + full = self._parent.data + idx = (slice(None), *self.index) + return full[idx] + + @property + def data_ro(self): + full = self._parent.data_ro + idx = (slice(None), *self.index) + return full[idx] + + @property + def data_with_halos(self): + full = self._parent.data_with_halos + idx = (slice(None), *self.index) + return full[idx] + + @property + def data_ro_with_halos(self): + full = self._parent.data_ro_with_halos + idx = (slice(None), *self.index) + return full[idx] + + +class Dat(AbstractDat, VecAccessMixin): + @utils.cached_property + def _vec(self): + assert self.dtype == PETSc.ScalarType, \ + "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) + # Can't duplicate layout_vec of dataset, because we then + # carry around extra unnecessary data. + # But use getSizes to save an Allreduce in computing the + # global size. + size = self.dataset.layout_vec.getSizes() + data = self._data[:size[0]] + return PETSc.Vec().createWithArray(data, size=size, bsize=self.cdim, comm=self.comm) + + @contextlib.contextmanager + def vec_context(self, access): + r"""A context manager for a :class:`PETSc.Vec` from a :class:`Dat`. + + :param access: Access descriptor: READ, WRITE, or RW.""" + # PETSc Vecs have a state counter and cache norm computations + # to return immediately if the state counter is unchanged. + # Since we've updated the data behind their back, we need to + # change that state counter. + self._vec.stateIncrease() + yield self._vec + if access is not Access.READ: + self.halo_valid = False + + +class MixedDat(AbstractDat, VecAccessMixin): + r"""A container for a bag of :class:`Dat`\s. + + Initialized either from a :class:`MixedDataSet`, a :class:`MixedSet`, or + an iterable of :class:`DataSet`\s and/or :class:`Set`\s, where all the + :class:`Set`\s are implcitly upcast to :class:`DataSet`\s :: + + mdat = op2.MixedDat(mdset) + mdat = op2.MixedDat([dset1, ..., dsetN]) + + or from an iterable of :class:`Dat`\s :: + + mdat = op2.MixedDat([dat1, ..., datN]) + """ + + def __init__(self, mdset_or_dats): + from pyop2.types.glob import Global + + def what(x): + if isinstance(x, (Global, GlobalDataSet, GlobalSet)): + return Global + elif isinstance(x, (Dat, DataSet, Set)): + return Dat + else: + raise ex.DataSetTypeError("Huh?!") + + if isinstance(mdset_or_dats, MixedDat): + self._dats = tuple(what(d)(d) for d in mdset_or_dats) + else: + self._dats = tuple(d if isinstance(d, (Dat, Global)) else what(d)(d) for d in mdset_or_dats) + if not all(d.dtype == self._dats[0].dtype for d in self._dats): + raise ex.DataValueError('MixedDat with different dtypes is not supported') + # TODO: Think about different communicators on dats (c.f. MixedSet) + self.comm = self._dats[0].comm + + @utils.cached_property + def _kernel_args_(self): + return tuple(itertools.chain(*(d._kernel_args_ for d in self))) + + @utils.cached_property + def _argtypes_(self): + return tuple(itertools.chain(*(d._argtypes_ for d in self))) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self),) + tuple(d._wrapper_cache_key_ for d in self) + + def __getitem__(self, idx): + """Return :class:`Dat` with index ``idx`` or a given slice of Dats.""" + return self._dats[idx] + + @utils.cached_property + def dtype(self): + """The NumPy dtype of the data.""" + return self._dats[0].dtype + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`Dat`\s.""" + return self._dats + + @utils.cached_property + def dataset(self): + r""":class:`MixedDataSet`\s this :class:`MixedDat` is defined on.""" + return MixedDataSet(tuple(s.dataset for s in self._dats)) + + @utils.cached_property + def _data(self): + """Return the user-provided data buffer, or a zeroed buffer of + the correct size if none was provided.""" + return tuple(d._data for d in self) + + @property + @mpi.collective + def data(self): + """Numpy arrays containing the data excluding halos.""" + return tuple(s.data for s in self._dats) + + @property + @mpi.collective + def data_with_halos(self): + """Numpy arrays containing the data including halos.""" + return tuple(s.data_with_halos for s in self._dats) + + @property + @mpi.collective + def data_ro(self): + """Numpy arrays with read-only data excluding halos.""" + return tuple(s.data_ro for s in self._dats) + + @property + @mpi.collective + def data_ro_with_halos(self): + """Numpy arrays with read-only data including halos.""" + return tuple(s.data_ro_with_halos for s in self._dats) + + @property + def halo_valid(self): + """Does this Dat have up to date halos?""" + return all(s.halo_valid for s in self) + + @halo_valid.setter + def halo_valid(self, val): + """Indictate whether this Dat requires a halo update""" + for d in self: + d.halo_valid = val + + @mpi.collective + def global_to_local_begin(self, access_mode): + for s in self: + s.global_to_local_begin(access_mode) + + @mpi.collective + def global_to_local_end(self, access_mode): + for s in self: + s.global_to_local_end(access_mode) + + @mpi.collective + def local_to_global_begin(self, insert_mode): + for s in self: + s.local_to_global_begin(insert_mode) + + @mpi.collective + def local_to_global_end(self, insert_mode): + for s in self: + s.local_to_global_end(insert_mode) + + @mpi.collective + def zero(self, subset=None): + """Zero the data associated with this :class:`MixedDat`. + + :arg subset: optional subset of entries to zero (not implemented).""" + if subset is not None: + raise NotImplementedError("Subsets of mixed sets not implemented") + for d in self._dats: + d.zero() + + @utils.cached_property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`MixedDat` in bytes. This will be the correct size of the data + payload, but does not take into account the (presumably small) + overhead of the object and its metadata. + + Note that this is the process local memory usage, not the sum + over all MPI processes. + """ + + return np.sum([d.nbytes for d in self._dats]) + + @mpi.collective + def copy(self, other, subset=None): + """Copy the data in this :class:`MixedDat` into another. + + :arg other: The destination :class:`MixedDat` + :arg subset: Subsets are not supported, this must be :class:`None`""" + + if subset is not None: + raise NotImplementedError("MixedDat.copy with a Subset is not supported") + for s, o in zip(self, other): + s.copy(o) + + def __iter__(self): + r"""Yield all :class:`Dat`\s when iterated over.""" + for d in self._dats: + yield d + + def __len__(self): + r"""Return number of contained :class:`Dats`\s.""" + return len(self._dats) + + def __hash__(self): + return hash(self._dats) + + def __eq__(self, other): + r""":class:`MixedDat`\s are equal if all their contained :class:`Dat`\s + are.""" + return type(self) == type(other) and self._dats == other._dats + + def __ne__(self, other): + r""":class:`MixedDat`\s are equal if all their contained :class:`Dat`\s + are.""" + return not self.__eq__(other) + + def __str__(self): + return "OP2 MixedDat composed of Dats: %s" % (self._dats,) + + def __repr__(self): + return "MixedDat(%r)" % (self._dats,) + + def inner(self, other): + """Compute the l2 inner product. + + :arg other: the other :class:`MixedDat` to compute the inner product against""" + ret = 0 + for s, o in zip(self, other): + ret += s.inner(o) + return ret + + def _op(self, other, op): + ret = [] + if np.isscalar(other): + for s in self: + ret.append(op(s, other)) + else: + self._check_shape(other) + for s, o in zip(self, other): + ret.append(op(s, o)) + return MixedDat(ret) + + def _iop(self, other, op): + if np.isscalar(other): + for s in self: + op(s, other) + else: + self._check_shape(other) + for s, o in zip(self, other): + op(s, o) + return self + + def __pos__(self): + ret = [] + for s in self: + ret.append(s.__pos__()) + return MixedDat(ret) + + def __neg__(self): + ret = [] + for s in self: + ret.append(s.__neg__()) + return MixedDat(ret) + + def __add__(self, other): + """Pointwise addition of fields.""" + return self._op(other, operator.add) + + def __radd__(self, other): + """Pointwise addition of fields. + + self.__radd__(other) <==> other + self.""" + return self._op(other, operator.add) + + def __sub__(self, other): + """Pointwise subtraction of fields.""" + return self._op(other, operator.sub) + + def __rsub__(self, other): + """Pointwise subtraction of fields. + + self.__rsub__(other) <==> other - self.""" + return self._op(other, operator.sub) + + def __mul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._op(other, operator.mul) + + def __rmul__(self, other): + """Pointwise multiplication or scaling of fields. + + self.__rmul__(other) <==> other * self.""" + return self._op(other, operator.mul) + + def __div__(self, other): + """Pointwise division or scaling of fields.""" + return self._op(other, operator.div) + + def __iadd__(self, other): + """Pointwise addition of fields.""" + return self._iop(other, operator.iadd) + + def __isub__(self, other): + """Pointwise subtraction of fields.""" + return self._iop(other, operator.isub) + + def __imul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._iop(other, operator.imul) + + def __idiv__(self, other): + """Pointwise division or scaling of fields.""" + return self._iop(other, operator.idiv) + + @utils.cached_property + def _vec(self): + assert self.dtype == PETSc.ScalarType, \ + "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) + # In this case we can just duplicate the layout vec + # because we're not placing an array. + return self.dataset.layout_vec.duplicate() + + @contextlib.contextmanager + def vec_context(self, access): + r"""A context manager scattering the arrays of all components of this + :class:`MixedDat` into a contiguous :class:`PETSc.Vec` and reverse + scattering to the original arrays when exiting the context. + + :param access: Access descriptor: READ, WRITE, or RW. + + .. note:: + + The :class:`~PETSc.Vec` obtained from this context is in + the correct order to be left multiplied by a compatible + :class:`MixedMat`. In parallel it is *not* just a + concatenation of the underlying :class:`Dat`\s.""" + # Do the actual forward scatter to fill the full vector with + # values + if access is not Access.WRITE: + offset = 0 + array = self._vec.array + for d in self: + with d.vec_ro as v: + size = v.local_size + array[offset:offset+size] = v.array_r[:] + offset += size + self._vec.stateIncrease() + yield self._vec + if access is not Access.READ: + # Reverse scatter to get the values back to their original locations + offset = 0 + array = self._vec.array_r + for d in self: + with d.vec_wo as v: + size = v.local_size + v.array[:] = array[offset:offset+size] + offset += size + self.halo_valid = False diff --git a/pyop2/types/data_carrier.py b/pyop2/types/data_carrier.py new file mode 100644 index 000000000..78a268a84 --- /dev/null +++ b/pyop2/types/data_carrier.py @@ -0,0 +1,109 @@ +import abc + +import numpy as np + +from pyop2 import ( + datatypes as dtypes, + mpi, + utils +) +from pyop2.types.access import Access + + +class DataCarrier(abc.ABC): + + """Abstract base class for OP2 data. + + Actual objects will be :class:`DataCarrier` objects of rank 0 + (:class:`Global`), rank 1 (:class:`Dat`), or rank 2 + (:class:`Mat`)""" + + @utils.cached_property + def dtype(self): + """The Python type of the data.""" + return self._data.dtype + + @utils.cached_property + def ctype(self): + """The c type of the data.""" + return dtypes.as_cstr(self.dtype) + + @utils.cached_property + def name(self): + """User-defined label.""" + return self._name + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the object.""" + return self._dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the object. This is + the product of the dim tuple.""" + return self._cdim + + +class EmptyDataMixin(abc.ABC): + """A mixin for :class:`Dat` and :class:`Global` objects that takes + care of allocating data on demand if the user has passed nothing + in. + + Accessing the :attr:`_data` property allocates a zeroed data array + if it does not already exist. + """ + def __init__(self, data, dtype, shape): + if data is None: + self._dtype = np.dtype(dtype if dtype is not None else dtypes.ScalarType) + else: + self._numpy_data = utils.verify_reshape(data, dtype, shape, allow_none=True) + self._dtype = self._data.dtype + + @utils.cached_property + def _data(self): + """Return the user-provided data buffer, or a zeroed buffer of + the correct size if none was provided.""" + if not self._is_allocated: + self._numpy_data = np.zeros(self.shape, dtype=self._dtype) + return self._numpy_data + + @property + def _is_allocated(self): + """Return True if the data buffer has been allocated.""" + return hasattr(self, '_numpy_data') + + +class VecAccessMixin(abc.ABC): + @abc.abstractmethod + def vec_context(self, access): + pass + + @abc.abstractproperty + def _vec(self): + pass + + @property + @mpi.collective + def vec(self): + """Context manager for a PETSc Vec appropriate for this Dat. + + You're allowed to modify the data you get back from this view.""" + return self.vec_context(access=Access.RW) + + @property + @mpi.collective + def vec_wo(self): + """Context manager for a PETSc Vec appropriate for this Dat. + + You're allowed to modify the data you get back from this view, + but you cannot read from it.""" + return self.vec_context(access=Access.WRITE) + + @property + @mpi.collective + def vec_ro(self): + """Context manager for a PETSc Vec appropriate for this Dat. + + You're not allowed to modify the data you get back from this view.""" + return self.vec_context(access=Access.READ) diff --git a/pyop2/types/dataset.py b/pyop2/types/dataset.py new file mode 100644 index 000000000..635b130e3 --- /dev/null +++ b/pyop2/types/dataset.py @@ -0,0 +1,531 @@ +import numbers + +import numpy as np +from petsc4py import PETSc + +from pyop2 import ( + caching, + datatypes as dtypes, + exceptions as ex, + mpi, + utils +) +from pyop2.types.set import ExtrudedSet, GlobalSet, MixedSet, Set, Subset + + +class DataSet(caching.ObjectCached): + """PyOP2 Data Set + + Set used in the op2.Dat structures to specify the dimension of the data. + """ + + @utils.validate_type(('iter_set', Set, ex.SetTypeError), + ('dim', (numbers.Integral, tuple, list), ex.DimTypeError), + ('name', str, ex.NameTypeError)) + def __init__(self, iter_set, dim=1, name=None): + if isinstance(iter_set, ExtrudedSet): + raise NotImplementedError("Not allowed!") + if self._initialized: + return + if isinstance(iter_set, Subset): + raise NotImplementedError("Deriving a DataSet from a Subset is unsupported") + self._set = iter_set + self._dim = utils.as_tuple(dim, numbers.Integral) + self._cdim = np.prod(self._dim).item() + self._name = name or "dset_#x%x" % id(self) + self._initialized = True + + @classmethod + def _process_args(cls, *args, **kwargs): + return (args[0], ) + args, kwargs + + @classmethod + def _cache_key(cls, iter_set, dim=1, name=None): + return (iter_set, utils.as_tuple(dim, numbers.Integral)) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dim, self._set._wrapper_cache_key_) + + def __getstate__(self): + """Extract state to pickle.""" + return self.__dict__ + + def __setstate__(self, d): + """Restore from pickled state.""" + self.__dict__.update(d) + + # Look up any unspecified attributes on the _set. + def __getattr__(self, name): + """Returns a Set specific attribute.""" + value = getattr(self.set, name) + setattr(self, name, value) + return value + + def __getitem__(self, idx): + """Allow index to return self""" + assert idx == 0 + return self + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the set.""" + return self._dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the set. This is + the product of the dim tuple.""" + return self._cdim + + @utils.cached_property + def name(self): + """Returns the name of the data set.""" + return self._name + + @utils.cached_property + def set(self): + """Returns the parent set of the data set.""" + return self._set + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 DataSet: %s on set %s, with dim %s" % \ + (self._name, self._set, self._dim) + + def __repr__(self): + return "DataSet(%r, %r, %r)" % (self._set, self._dim, self._name) + + def __contains__(self, dat): + """Indicate whether a given Dat is compatible with this DataSet.""" + return dat.dataset == self + + @utils.cached_property + def lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet`. + """ + lgmap = PETSc.LGMap() + if self.comm.size == 1: + lgmap.create(indices=np.arange(self.size, dtype=dtypes.IntType), + bsize=self.cdim, comm=self.comm) + else: + lgmap.create(indices=self.halo.local_to_global_numbering, + bsize=self.cdim, comm=self.comm) + return lgmap + + @utils.cached_property + def scalar_lgmap(self): + if self.cdim == 1: + return self.lgmap + indices = self.lgmap.block_indices + return PETSc.LGMap().create(indices=indices, bsize=1, comm=self.comm) + + @utils.cached_property + def unblocked_lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet` with a block size of 1. + """ + if self.cdim == 1: + return self.lgmap + else: + indices = self.lgmap.indices + lgmap = PETSc.LGMap().create(indices=indices, + bsize=1, comm=self.lgmap.comm) + return lgmap + + @utils.cached_property + def field_ises(self): + """A list of PETSc ISes defining the global indices for each set in + the DataSet. + + Used when extracting blocks from matrices for solvers.""" + ises = [] + nlocal_rows = 0 + for dset in self: + nlocal_rows += dset.size * dset.cdim + offset = self.comm.scan(nlocal_rows) + offset -= nlocal_rows + for dset in self: + nrows = dset.size * dset.cdim + iset = PETSc.IS().createStride(nrows, first=offset, step=1, + comm=self.comm) + iset.setBlockSize(dset.cdim) + ises.append(iset) + offset += nrows + return tuple(ises) + + @utils.cached_property + def local_ises(self): + """A list of PETSc ISes defining the local indices for each set in the DataSet. + + Used when extracting blocks from matrices for assembly.""" + ises = [] + start = 0 + for dset in self: + bs = dset.cdim + n = dset.total_size*bs + iset = PETSc.IS().createStride(n, first=start, step=1, + comm=mpi.COMM_SELF) + iset.setBlockSize(bs) + start += n + ises.append(iset) + return tuple(ises) + + @utils.cached_property + def layout_vec(self): + """A PETSc Vec compatible with the dof layout of this DataSet.""" + vec = PETSc.Vec().create(comm=self.comm) + size = (self.size * self.cdim, None) + vec.setSizes(size, bsize=self.cdim) + vec.setUp() + return vec + + @utils.cached_property + def dm(self): + dm = PETSc.DMShell().create(comm=self.comm) + dm.setGlobalVector(self.layout_vec) + return dm + + +class GlobalDataSet(DataSet): + """A proxy :class:`DataSet` for use in a :class:`Sparsity` where the + matrix has :class:`Global` rows or columns.""" + + def __init__(self, global_): + """ + :param global_: The :class:`Global` on which this object is based.""" + + self._global = global_ + self._globalset = GlobalSet(comm=self.comm) + self._name = "gdset_#x%x" % id(self) + + @classmethod + def _cache_key(cls, *args): + return None + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the set.""" + return self._global._dim + + @utils.cached_property + def cdim(self): + """The scalar number of values for each member of the set. This is + the product of the dim tuple.""" + return self._global._cdim + + @utils.cached_property + def name(self): + """Returns the name of the data set.""" + return self._global._name + + @utils.cached_property + def comm(self): + """Return the communicator on which the set is defined.""" + return self._global.comm + + @utils.cached_property + def set(self): + """Returns the parent set of the data set.""" + return self._globalset + + @utils.cached_property + def size(self): + """The number of local entries in the Dataset (1 on rank 0)""" + return 1 if mpi.MPI.comm.rank == 0 else 0 + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 GlobalDataSet: %s on Global %s" % \ + (self._name, self._global) + + def __repr__(self): + return "GlobalDataSet(%r)" % (self._global) + + @utils.cached_property + def lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet`. + """ + lgmap = PETSc.LGMap() + lgmap.create(indices=np.arange(1, dtype=dtypes.IntType), + bsize=self.cdim, comm=self.comm) + return lgmap + + @utils.cached_property + def unblocked_lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet` with a block size of 1. + """ + if self.cdim == 1: + return self.lgmap + else: + indices = self.lgmap.indices + lgmap = PETSc.LGMap().create(indices=indices, + bsize=1, comm=self.lgmap.comm) + return lgmap + + @utils.cached_property + def field_ises(self): + """A list of PETSc ISes defining the global indices for each set in + the DataSet. + + Used when extracting blocks from matrices for solvers.""" + ises = [] + nlocal_rows = 0 + for dset in self: + nlocal_rows += dset.size * dset.cdim + offset = self.comm.scan(nlocal_rows) + offset -= nlocal_rows + for dset in self: + nrows = dset.size * dset.cdim + iset = PETSc.IS().createStride(nrows, first=offset, step=1, + comm=self.comm) + iset.setBlockSize(dset.cdim) + ises.append(iset) + offset += nrows + return tuple(ises) + + @utils.cached_property + def local_ises(self): + """A list of PETSc ISes defining the local indices for each set in the DataSet. + + Used when extracting blocks from matrices for assembly.""" + raise NotImplementedError + + @utils.cached_property + def layout_vec(self): + """A PETSc Vec compatible with the dof layout of this DataSet.""" + vec = PETSc.Vec().create(comm=self.comm) + size = (self.size * self.cdim, None) + vec.setSizes(size, bsize=self.cdim) + vec.setUp() + return vec + + @utils.cached_property + def dm(self): + dm = PETSc.DMShell().create(comm=self.comm) + dm.setGlobalVector(self.layout_vec) + return dm + + +class MixedDataSet(DataSet): + r"""A container for a bag of :class:`DataSet`\s. + + Initialized either from a :class:`MixedSet` and an iterable or iterator of + ``dims`` of corresponding length :: + + mdset = op2.MixedDataSet(mset, [dim1, ..., dimN]) + + or from a tuple of :class:`Set`\s and an iterable of ``dims`` of + corresponding length :: + + mdset = op2.MixedDataSet([set1, ..., setN], [dim1, ..., dimN]) + + If all ``dims`` are to be the same, they can also be given as an + :class:`int` for either of above invocations :: + + mdset = op2.MixedDataSet(mset, dim) + mdset = op2.MixedDataSet([set1, ..., setN], dim) + + Initialized from a :class:`MixedSet` without explicitly specifying ``dims`` + they default to 1 :: + + mdset = op2.MixedDataSet(mset) + + Initialized from an iterable or iterator of :class:`DataSet`\s and/or + :class:`Set`\s, where :class:`Set`\s are implicitly upcast to + :class:`DataSet`\s of dim 1 :: + + mdset = op2.MixedDataSet([dset1, ..., dsetN]) + """ + + def __init__(self, arg, dims=None): + r""" + :param arg: a :class:`MixedSet` or an iterable or a generator + expression of :class:`Set`\s or :class:`DataSet`\s or a + mixture of both + :param dims: `None` (the default) or an :class:`int` or an iterable or + generator expression of :class:`int`\s, which **must** be + of same length as `arg` + + .. Warning :: + When using generator expressions for ``arg`` or ``dims``, these + **must** terminate or else will cause an infinite loop. + """ + if self._initialized: + return + self._dsets = arg + self._initialized = True + + @classmethod + def _process_args(cls, arg, dims=None): + # If the second argument is not None it is expect to be a scalar dim + # or an iterable of dims and the first is expected to be a MixedSet or + # an iterable of Sets + if dims is not None: + # If arg is a MixedSet, get its Sets tuple + sets = arg.split if isinstance(arg, MixedSet) else tuple(arg) + # If dims is a scalar, turn it into a tuple of right length + dims = (dims,) * len(sets) if isinstance(dims, int) else tuple(dims) + if len(sets) != len(dims): + raise ValueError("Got MixedSet of %d Sets but %s dims" % + (len(sets), len(dims))) + dsets = tuple(s ** d for s, d in zip(sets, dims)) + # Otherwise expect the first argument to be an iterable of Sets and/or + # DataSets and upcast Sets to DataSets as necessary + else: + arg = [s if isinstance(s, DataSet) else s ** 1 for s in arg] + dsets = utils.as_tuple(arg, type=DataSet) + + return (dsets[0].set, ) + (dsets, ), {} + + @classmethod + def _cache_key(cls, arg, dims=None): + return arg + + @utils.cached_property + def _wrapper_cache_key_(self): + raise NotImplementedError + + def __getitem__(self, idx): + """Return :class:`DataSet` with index ``idx`` or a given slice of datasets.""" + return self._dsets[idx] + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`DataSet`\s.""" + return self._dsets + + @utils.cached_property + def dim(self): + """The shape tuple of the values for each element of the sets.""" + return tuple(s.dim for s in self._dsets) + + @utils.cached_property + def cdim(self): + """The sum of the scalar number of values for each member of the sets. + This is the sum of products of the dim tuples.""" + return sum(s.cdim for s in self._dsets) + + @utils.cached_property + def name(self): + """Returns the name of the data sets.""" + return tuple(s.name for s in self._dsets) + + @utils.cached_property + def set(self): + """Returns the :class:`MixedSet` this :class:`MixedDataSet` is + defined on.""" + return MixedSet(s.set for s in self._dsets) + + def __iter__(self): + r"""Yield all :class:`DataSet`\s when iterated over.""" + for ds in self._dsets: + yield ds + + def __len__(self): + """Return number of contained :class:`DataSet`s.""" + return len(self._dsets) + + def __str__(self): + return "OP2 MixedDataSet composed of DataSets: %s" % (self._dsets,) + + def __repr__(self): + return "MixedDataSet(%r)" % (self._dsets,) + + @utils.cached_property + def layout_vec(self): + """A PETSc Vec compatible with the dof layout of this MixedDataSet.""" + vec = PETSc.Vec().create(comm=self.comm) + # Compute local and global size from sizes of layout vecs + lsize, gsize = map(sum, zip(*(d.layout_vec.sizes for d in self))) + vec.setSizes((lsize, gsize), bsize=1) + vec.setUp() + return vec + + @utils.cached_property + def lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`MixedDataSet`. + """ + lgmap = PETSc.LGMap() + if self.comm.size == 1: + size = sum(s.size * s.cdim for s in self) + lgmap.create(indices=np.arange(size, dtype=dtypes.IntType), + bsize=1, comm=self.comm) + return lgmap + # Compute local to global maps for a monolithic mixed system + # from the individual local to global maps for each field. + # Exposition: + # + # We have N fields and P processes. The global row + # ordering is: + # + # f_0_p_0, f_1_p_0, ..., f_N_p_0; f_0_p_1, ..., ; f_0_p_P, + # ..., f_N_p_P. + # + # We have per-field local to global numberings, to convert + # these into multi-field local to global numberings, we note + # the following: + # + # For each entry in the per-field l2g map, we first determine + # the rank that entry belongs to, call this r. + # + # We know that this must be offset by: + # 1. The sum of all field lengths with rank < r + # 2. The sum of all lower-numbered field lengths on rank r. + # + # Finally, we need to shift the field-local entry by the + # current field offset. + idx_size = sum(s.total_size*s.cdim for s in self) + indices = np.full(idx_size, -1, dtype=dtypes.IntType) + owned_sz = np.array([sum(s.size * s.cdim for s in self)], + dtype=dtypes.IntType) + field_offset = np.empty_like(owned_sz) + self.comm.Scan(owned_sz, field_offset) + field_offset -= owned_sz + + all_field_offsets = np.empty(self.comm.size, dtype=dtypes.IntType) + self.comm.Allgather(field_offset, all_field_offsets) + + start = 0 + all_local_offsets = np.zeros(self.comm.size, dtype=dtypes.IntType) + current_offsets = np.zeros(self.comm.size + 1, dtype=dtypes.IntType) + for s in self: + idx = indices[start:start + s.total_size * s.cdim] + owned_sz[0] = s.size * s.cdim + self.comm.Scan(owned_sz, field_offset) + self.comm.Allgather(field_offset, current_offsets[1:]) + # Find the ranks each entry in the l2g belongs to + l2g = s.unblocked_lgmap.indices + tmp_indices = np.searchsorted(current_offsets, l2g, side="right") - 1 + idx[:] = l2g[:] - current_offsets[tmp_indices] + \ + all_field_offsets[tmp_indices] + all_local_offsets[tmp_indices] + self.comm.Allgather(owned_sz, current_offsets[1:]) + all_local_offsets += current_offsets[1:] + start += s.total_size * s.cdim + lgmap.create(indices=indices, bsize=1, comm=self.comm) + return lgmap + + @utils.cached_property + def unblocked_lgmap(self): + """A PETSc LGMap mapping process-local indices to global + indices for this :class:`DataSet` with a block size of 1. + """ + return self.lgmap diff --git a/pyop2/types/glob.py b/pyop2/types/glob.py new file mode 100644 index 000000000..9470570e8 --- /dev/null +++ b/pyop2/types/glob.py @@ -0,0 +1,290 @@ +from contextlib import contextmanager +import ctypes +import operator + +import numpy as np +from petsc4py import PETSc + +from pyop2 import ( + exceptions as ex, + mpi, + utils +) +from pyop2.types.access import Access +from pyop2.types.dataset import GlobalDataSet +from pyop2.types.data_carrier import DataCarrier, EmptyDataMixin, VecAccessMixin + + +class Global(DataCarrier, EmptyDataMixin, VecAccessMixin): + + """OP2 global value. + + When a ``Global`` is passed to a :func:`pyop2.op2.par_loop`, the access + descriptor is passed by `calling` the ``Global``. For example, if + a ``Global`` named ``G`` is to be accessed for reading, this is + accomplished by:: + + G(pyop2.READ) + + It is permissible to pass `None` as the `data` argument. In this + case, allocation of the data buffer is postponed until it is + accessed. + + .. note:: + If the data buffer is not passed in, it is implicitly + initialised to be zero. + """ + + _modes = [Access.READ, Access.INC, Access.MIN, Access.MAX] + + @utils.validate_type(('name', str, ex.NameTypeError)) + def __init__(self, dim, data=None, dtype=None, name=None, comm=None): + if isinstance(dim, Global): + # If g is a Global, Global(g) performs a deep copy. This is for compatibility with Dat. + self.__init__(dim._dim, None, dtype=dim.dtype, + name="copy_of_%s" % dim.name, comm=dim.comm) + dim.copy(self) + return + self._dim = utils.as_tuple(dim, int) + self._cdim = np.prod(self._dim).item() + EmptyDataMixin.__init__(self, data, dtype, self._dim) + self._buf = np.empty(self.shape, dtype=self.dtype) + self._name = name or "global_#x%x" % id(self) + self.comm = comm + + @utils.cached_property + def _kernel_args_(self): + return (self._data.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dtype, self.shape) + + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, path=None): + from pyop2.parloop import Arg + return Arg(data=self, access=access) + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __getitem__(self, idx): + """Return self if ``idx`` is 0, raise an error otherwise.""" + if idx != 0: + raise ex.IndexValueError("Can only extract component 0 from %r" % self) + return self + + def __str__(self): + return "OP2 Global Argument: %s with dim %s and value %s" \ + % (self._name, self._dim, self._data) + + def __repr__(self): + return "Global(%r, %r, %r, %r)" % (self._dim, self._data, + self._data.dtype, self._name) + + @utils.cached_property + def dataset(self): + return GlobalDataSet(self) + + @property + def shape(self): + return self._dim + + @property + def data(self): + """Data array.""" + if len(self._data) == 0: + raise RuntimeError("Illegal access: No data associated with this Global!") + return self._data + + @property + def dtype(self): + return self._dtype + + @property + def data_ro(self): + """Data array.""" + view = self.data.view() + view.setflags(write=False) + return view + + @data.setter + def data(self, value): + self._data[:] = utils.verify_reshape(value, self.dtype, self.dim) + + @property + def nbytes(self): + """Return an estimate of the size of the data associated with this + :class:`Global` in bytes. This will be the correct size of the + data payload, but does not take into account the overhead of + the object and its metadata. This renders this method of + little statistical significance, however it is included to + make the interface consistent. + """ + + return self.dtype.itemsize * self._cdim + + @mpi.collective + def duplicate(self): + """Return a deep copy of self.""" + return type(self)(self.dim, data=np.copy(self.data_ro), + dtype=self.dtype, name=self.name) + + @mpi.collective + def copy(self, other, subset=None): + """Copy the data in this :class:`Global` into another. + + :arg other: The destination :class:`Global` + :arg subset: A :class:`Subset` of elements to copy (optional)""" + + other.data = np.copy(self.data_ro) + + @mpi.collective + def zero(self): + self._data[...] = 0 + + @mpi.collective + def global_to_local_begin(self, access_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def global_to_local_end(self, access_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def local_to_global_begin(self, insert_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + @mpi.collective + def local_to_global_end(self, insert_mode): + """Dummy halo operation for the case in which a :class:`Global` forms + part of a :class:`MixedDat`.""" + pass + + def _op(self, other, op): + ret = type(self)(self.dim, dtype=self.dtype, name=self.name, comm=self.comm) + if isinstance(other, Global): + ret.data[:] = op(self.data_ro, other.data_ro) + else: + ret.data[:] = op(self.data_ro, other) + return ret + + def _iop(self, other, op): + if isinstance(other, Global): + op(self.data[:], other.data_ro) + else: + op(self.data[:], other) + return self + + def __pos__(self): + return self.duplicate() + + def __add__(self, other): + """Pointwise addition of fields.""" + return self._op(other, operator.add) + + def __radd__(self, other): + """Pointwise addition of fields. + + self.__radd__(other) <==> other + self.""" + return self + other + + def __neg__(self): + return type(self)(self.dim, data=-np.copy(self.data_ro), + dtype=self.dtype, name=self.name) + + def __sub__(self, other): + """Pointwise subtraction of fields.""" + return self._op(other, operator.sub) + + def __rsub__(self, other): + """Pointwise subtraction of fields. + + self.__rsub__(other) <==> other - self.""" + ret = -self + ret += other + return ret + + def __mul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._op(other, operator.mul) + + def __rmul__(self, other): + """Pointwise multiplication or scaling of fields. + + self.__rmul__(other) <==> other * self.""" + return self.__mul__(other) + + def __truediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._op(other, operator.truediv) + + def __iadd__(self, other): + """Pointwise addition of fields.""" + return self._iop(other, operator.iadd) + + def __isub__(self, other): + """Pointwise subtraction of fields.""" + return self._iop(other, operator.isub) + + def __imul__(self, other): + """Pointwise multiplication or scaling of fields.""" + return self._iop(other, operator.imul) + + def __itruediv__(self, other): + """Pointwise division or scaling of fields.""" + return self._iop(other, operator.itruediv) + + def inner(self, other): + assert isinstance(other, Global) + return np.dot(self.data_ro, np.conj(other.data_ro)) + + @utils.cached_property + def _vec(self): + assert self.dtype == PETSc.ScalarType, \ + "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) + # Can't duplicate layout_vec of dataset, because we then + # carry around extra unnecessary data. + # But use getSizes to save an Allreduce in computing the + # global size. + data = self._data + size = self.dataset.layout_vec.getSizes() + if self.comm.rank == 0: + return PETSc.Vec().createWithArray(data, size=size, + bsize=self.cdim, + comm=self.comm) + else: + return PETSc.Vec().createWithArray(np.empty(0, dtype=self.dtype), + size=size, + bsize=self.cdim, + comm=self.comm) + + @contextmanager + def vec_context(self, access): + """A context manager for a :class:`PETSc.Vec` from a :class:`Global`. + + :param access: Access descriptor: READ, WRITE, or RW.""" + # PETSc Vecs have a state counter and cache norm computations + # to return immediately if the state counter is unchanged. + # Since we've updated the data behind their back, we need to + # change that state counter. + self._vec.stateIncrease() + yield self._vec + if access is not Access.READ: + data = self._data + self.comm.Bcast(data, 0) diff --git a/pyop2/types/halo.py b/pyop2/types/halo.py new file mode 100644 index 000000000..6b69e686f --- /dev/null +++ b/pyop2/types/halo.py @@ -0,0 +1,56 @@ +import abc + + +class Halo(abc.ABC): + + """A description of a halo associated with a :class:`Set`. + + The halo object describes which :class:`Set` elements are sent + where, and which :class:`Set` elements are received from where. + """ + + @abc.abstractproperty + def comm(self): + """The MPI communicator for this halo.""" + pass + + @abc.abstractproperty + def local_to_global_numbering(self): + """The mapping from process-local to process-global numbers for this halo.""" + pass + + @abc.abstractmethod + def global_to_local_begin(self, dat, insert_mode): + """Begin an exchange from global (assembled) to local (ghosted) representation. + + :arg dat: The :class:`Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass + + @abc.abstractmethod + def global_to_local_end(self, dat, insert_mode): + """Finish an exchange from global (assembled) to local (ghosted) representation. + + :arg dat: The :class:`Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass + + @abc.abstractmethod + def local_to_global_begin(self, dat, insert_mode): + """Begin an exchange from local (ghosted) to global (assembled) representation. + + :arg dat: The :class:`Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass + + @abc.abstractmethod + def local_to_global_end(self, dat, insert_mode): + """Finish an exchange from local (ghosted) to global (assembled) representation. + + :arg dat: The :class:`Dat` to exchange. + :arg insert_mode: The insertion mode. + """ + pass diff --git a/pyop2/types/map.py b/pyop2/types/map.py new file mode 100644 index 000000000..ce4843a6c --- /dev/null +++ b/pyop2/types/map.py @@ -0,0 +1,305 @@ +import ctypes +import itertools +import functools +import numbers + +import numpy as np + +from pyop2 import ( + caching, + datatypes as dtypes, + exceptions as ex, + utils +) +from pyop2.types.set import GlobalSet, MixedSet, Set + + +class Map: + + """OP2 map, a relation between two :class:`Set` objects. + + Each entry in the ``iterset`` maps to ``arity`` entries in the + ``toset``. When a map is used in a :func:`pyop2.op2.par_loop`, it is + possible to use Python index notation to select an individual entry on the + right hand side of this map. There are three possibilities: + + * No index. All ``arity`` :class:`Dat` entries will be passed to the + kernel. + * An integer: ``some_map[n]``. The ``n`` th entry of the + map result will be passed to the kernel. + """ + + dtype = dtypes.IntType + + @utils.validate_type(('iterset', Set, ex.SetTypeError), ('toset', Set, ex.SetTypeError), + ('arity', numbers.Integral, ex.ArityTypeError), ('name', str, ex.NameTypeError)) + def __init__(self, iterset, toset, arity, values=None, name=None, offset=None): + self._iterset = iterset + self._toset = toset + self.comm = toset.comm + self._arity = arity + self._values = utils.verify_reshape(values, dtypes.IntType, + (iterset.total_size, arity), allow_none=True) + self.shape = (iterset.total_size, arity) + self._name = name or "map_#x%x" % id(self) + if offset is None or len(offset) == 0: + self._offset = None + else: + self._offset = utils.verify_reshape(offset, dtypes.IntType, (arity, )) + # A cache for objects built on top of this map + self._cache = {} + + @utils.cached_property + def _kernel_args_(self): + return (self._values.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.arity, utils.tuplify(self.offset)) + + # This is necessary so that we can convert a Map to a tuple + # (needed in as_tuple). Because, __getitem__ no longer returns a + # Map we have to explicitly provide an iterable interface + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + @utils.cached_property + def split(self): + return (self,) + + @utils.cached_property + def iterset(self): + """:class:`Set` mapped from.""" + return self._iterset + + @utils.cached_property + def toset(self): + """:class:`Set` mapped to.""" + return self._toset + + @utils.cached_property + def arity(self): + """Arity of the mapping: number of toset elements mapped to per + iterset element.""" + return self._arity + + @utils.cached_property + def arities(self): + """Arity of the mapping: number of toset elements mapped to per + iterset element. + + :rtype: tuple""" + return (self._arity,) + + @utils.cached_property + def arange(self): + """Tuple of arity offsets for each constituent :class:`Map`.""" + return (0, self._arity) + + @utils.cached_property + def values(self): + """Mapping array. + + This only returns the map values for local points, to see the + halo points too, use :meth:`values_with_halo`.""" + return self._values[:self.iterset.size] + + @utils.cached_property + def values_with_halo(self): + """Mapping array. + + This returns all map values (including halo points), see + :meth:`values` if you only need to look at the local + points.""" + return self._values + + @utils.cached_property + def name(self): + """User-defined label""" + return self._name + + @utils.cached_property + def offset(self): + """The vertical offset.""" + return self._offset + + def __str__(self): + return "OP2 Map: %s from (%s) to (%s) with arity %s" \ + % (self._name, self._iterset, self._toset, self._arity) + + def __repr__(self): + return "Map(%r, %r, %r, None, %r)" \ + % (self._iterset, self._toset, self._arity, self._name) + + def __le__(self, o): + """self<=o if o equals self or self._parent <= o.""" + return self == o + + +class PermutedMap(Map): + """Composition of a standard :class:`Map` with a constant permutation. + + :arg map_: The map to permute. + :arg permutation: The permutation of the map indices. + + Where normally staging to element data is performed as + + .. code-block:: + + local[i] = global[map[i]] + + With a :class:`PermutedMap` we instead get + + .. code-block:: + + local[i] = global[map[permutation[i]]] + + This might be useful if your local kernel wants data in a + different order to the one that the map provides, and you don't + want two global-sized data structures. + """ + def __init__(self, map_, permutation): + self.map_ = map_ + self.permutation = np.asarray(permutation, dtype=Map.dtype) + assert (np.unique(permutation) == np.arange(map_.arity, dtype=Map.dtype)).all() + + @utils.cached_property + def _wrapper_cache_key_(self): + return super()._wrapper_cache_key_ + (tuple(self.permutation),) + + def __getattr__(self, name): + return getattr(self.map_, name) + + +class MixedMap(Map, caching.ObjectCached): + r"""A container for a bag of :class:`Map`\s.""" + + def __init__(self, maps): + r""":param iterable maps: Iterable of :class:`Map`\s""" + if self._initialized: + return + self._maps = maps + if not all(m is None or m.iterset == self.iterset for m in self._maps): + raise ex.MapTypeError("All maps in a MixedMap need to share the same iterset") + # TODO: Think about different communicators on maps (c.f. MixedSet) + # TODO: What if all maps are None? + comms = tuple(m.comm for m in self._maps if m is not None) + if not all(c == comms[0] for c in comms): + raise ex.MapTypeError("All maps needs to share a communicator") + if len(comms) == 0: + raise ex.MapTypeError("Don't know how to make communicator") + self.comm = comms[0] + self._initialized = True + + @classmethod + def _process_args(cls, *args, **kwargs): + maps = utils.as_tuple(args[0], type=Map, allow_none=True) + cache = maps[0] + return (cache, ) + (maps, ), kwargs + + @classmethod + def _cache_key(cls, maps): + return maps + + @utils.cached_property + def _kernel_args_(self): + return tuple(itertools.chain(*(m._kernel_args_ for m in self if m is not None))) + + @utils.cached_property + def _argtypes_(self): + return tuple(itertools.chain(*(m._argtypes_ for m in self if m is not None))) + + @utils.cached_property + def _wrapper_cache_key_(self): + return tuple(m._wrapper_cache_key_ for m in self if m is not None) + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`Map`\s.""" + return self._maps + + @utils.cached_property + def iterset(self): + """:class:`MixedSet` mapped from.""" + return functools.reduce(lambda a, b: a or b, map(lambda s: s if s is None else s.iterset, self._maps)) + + @utils.cached_property + def toset(self): + """:class:`MixedSet` mapped to.""" + return MixedSet(tuple(GlobalSet(comm=self.comm) if m is None else + m.toset for m in self._maps)) + + @utils.cached_property + def arity(self): + """Arity of the mapping: total number of toset elements mapped to per + iterset element.""" + return sum(m.arity for m in self._maps) + + @utils.cached_property + def arities(self): + """Arity of the mapping: number of toset elements mapped to per + iterset element. + + :rtype: tuple""" + return tuple(m.arity for m in self._maps) + + @utils.cached_property + def arange(self): + """Tuple of arity offsets for each constituent :class:`Map`.""" + return (0,) + tuple(np.cumsum(self.arities)) + + @utils.cached_property + def values(self): + """Mapping arrays excluding data for halos. + + This only returns the map values for local points, to see the + halo points too, use :meth:`values_with_halo`.""" + return tuple(m.values for m in self._maps) + + @utils.cached_property + def values_with_halo(self): + """Mapping arrays including data for halos. + + This returns all map values (including halo points), see + :meth:`values` if you only need to look at the local + points.""" + return tuple(None if m is None else + m.values_with_halo for m in self._maps) + + @utils.cached_property + def name(self): + """User-defined labels""" + return tuple(m.name for m in self._maps) + + @utils.cached_property + def offset(self): + """Vertical offsets.""" + return tuple(0 if m is None else m.offset for m in self._maps) + + def __iter__(self): + r"""Yield all :class:`Map`\s when iterated over.""" + for m in self._maps: + yield m + + def __len__(self): + r"""Number of contained :class:`Map`\s.""" + return len(self._maps) + + def __le__(self, o): + """self<=o if o equals self or its self._parent==o.""" + return self == o or all(m <= om for m, om in zip(self, o)) + + def __str__(self): + return "OP2 MixedMap composed of Maps: %s" % (self._maps,) + + def __repr__(self): + return "MixedMap(%r)" % (self._maps,) diff --git a/pyop2/petsc_base.py b/pyop2/types/mat.py similarity index 51% rename from pyop2/petsc_base.py rename to pyop2/types/mat.py index ef38b3aa3..f7da86547 100644 --- a/pyop2/petsc_base.py +++ b/pyop2/types/mat.py @@ -1,443 +1,357 @@ -# This file is part of PyOP2 -# -# PyOP2 is Copyright (c) 2012, Imperial College London and -# others. Please see the AUTHORS file in the main source directory for -# a full list of copyright holders. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * The name of Imperial College London or that of other -# contributors may not be used to endorse or promote products -# derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTERS -# ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -# OF THE POSSIBILITY OF SUCH DAMAGE. - -from contextlib import contextmanager -from petsc4py import PETSc +import abc +import ctypes import itertools + import numpy as np -import abc +from petsc4py import PETSc -from pyop2.datatypes import IntType, ScalarType -from pyop2 import base -from pyop2 import mpi -from pyop2 import sparsity -from pyop2 import utils -from pyop2.base import _make_object, Subset -from pyop2.mpi import collective -from pyop2.profiling import timed_region +from pyop2 import ( + caching, + configuration as conf, + datatypes as dtypes, + exceptions as ex, + mpi, + profiling, + sparsity, + utils +) +from pyop2.types.access import Access +from pyop2.types.data_carrier import DataCarrier +from pyop2.types.dataset import DataSet, GlobalDataSet, MixedDataSet +from pyop2.types.map import Map +from pyop2.types.set import MixedSet, Set, Subset -class DataSet(base.DataSet): +class Sparsity(caching.ObjectCached): - @utils.cached_property - def lgmap(self): - """A PETSc LGMap mapping process-local indices to global - indices for this :class:`DataSet`. - """ - lgmap = PETSc.LGMap() - if self.comm.size == 1: - lgmap.create(indices=np.arange(self.size, dtype=IntType), - bsize=self.cdim, comm=self.comm) - else: - lgmap.create(indices=self.halo.local_to_global_numbering, - bsize=self.cdim, comm=self.comm) - return lgmap + """OP2 Sparsity, the non-zero structure a matrix derived from the union of + the outer product of pairs of :class:`Map` objects. - @utils.cached_property - def scalar_lgmap(self): - if self.cdim == 1: - return self.lgmap - indices = self.lgmap.block_indices - return PETSc.LGMap().create(indices=indices, bsize=1, comm=self.comm) + Examples of constructing a Sparsity: :: - @utils.cached_property - def unblocked_lgmap(self): - """A PETSc LGMap mapping process-local indices to global - indices for this :class:`DataSet` with a block size of 1. + Sparsity(single_dset, single_map, 'mass') + Sparsity((row_dset, col_dset), (single_rowmap, single_colmap)) + Sparsity((row_dset, col_dset), + [(first_rowmap, first_colmap), (second_rowmap, second_colmap)]) + + .. _MatMPIAIJSetPreallocation: http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MatMPIAIJSetPreallocation.html + """ + + def __init__(self, dsets, maps, *, iteration_regions=None, name=None, nest=None, block_sparse=None): + r""" + :param dsets: :class:`DataSet`\s for the left and right function + spaces this :class:`Sparsity` maps between + :param maps: :class:`Map`\s to build the :class:`Sparsity` from + :type maps: a pair of :class:`Map`\s specifying a row map and a column + map, or an iterable of pairs of :class:`Map`\s specifying multiple + row and column maps - if a single :class:`Map` is passed, it is + used as both a row map and a column map + :param iteration_regions: regions that select subsets of extruded maps to iterate over. + :param string name: user-defined label (optional) + :param nest: Should the sparsity over mixed set be built as nested blocks? + :param block_sparse: Should the sparsity for datasets with + cdim > 1 be built as a block sparsity? """ - if self.cdim == 1: - return self.lgmap + # Protect against re-initialization when retrieved from cache + if self._initialized: + return + + self._block_sparse = block_sparse + # Split into a list of row maps and a list of column maps + maps, iteration_regions = zip(*maps) + self._rmaps, self._cmaps = zip(*maps) + self._dsets = dsets + + if isinstance(dsets[0], GlobalDataSet) or isinstance(dsets[1], GlobalDataSet): + self._dims = (((1, 1),),) + self._d_nnz = None + self._o_nnz = None + self._nrows = None if isinstance(dsets[0], GlobalDataSet) else self._rmaps[0].toset.size + self._ncols = None if isinstance(dsets[1], GlobalDataSet) else self._cmaps[0].toset.size + self.lcomm = dsets[0].comm if isinstance(dsets[0], GlobalDataSet) else self._rmaps[0].comm + self.rcomm = dsets[1].comm if isinstance(dsets[1], GlobalDataSet) else self._cmaps[0].comm else: - indices = self.lgmap.indices - lgmap = PETSc.LGMap().create(indices=indices, - bsize=1, comm=self.lgmap.comm) - return lgmap + self.lcomm = self._rmaps[0].comm + self.rcomm = self._cmaps[0].comm - @utils.cached_property - def field_ises(self): - """A list of PETSc ISes defining the global indices for each set in - the DataSet. - - Used when extracting blocks from matrices for solvers.""" - ises = [] - nlocal_rows = 0 - for dset in self: - nlocal_rows += dset.size * dset.cdim - offset = self.comm.scan(nlocal_rows) - offset -= nlocal_rows - for dset in self: - nrows = dset.size * dset.cdim - iset = PETSc.IS().createStride(nrows, first=offset, step=1, - comm=self.comm) - iset.setBlockSize(dset.cdim) - ises.append(iset) - offset += nrows - return tuple(ises) + rset, cset = self.dsets + # All rmaps and cmaps have the same data set - just use the first. + self._nrows = rset.size + self._ncols = cset.size - @utils.cached_property - def local_ises(self): - """A list of PETSc ISes defining the local indices for each set in the DataSet. - - Used when extracting blocks from matrices for assembly.""" - ises = [] - start = 0 - for dset in self: - bs = dset.cdim - n = dset.total_size*bs - iset = PETSc.IS().createStride(n, first=start, step=1, - comm=mpi.COMM_SELF) - iset.setBlockSize(bs) - start += n - ises.append(iset) - return tuple(ises) + self._has_diagonal = (rset == cset) - @utils.cached_property - def layout_vec(self): - """A PETSc Vec compatible with the dof layout of this DataSet.""" - vec = PETSc.Vec().create(comm=self.comm) - size = (self.size * self.cdim, None) - vec.setSizes(size, bsize=self.cdim) - vec.setUp() - return vec + tmp = itertools.product([x.cdim for x in self._dsets[0]], + [x.cdim for x in self._dsets[1]]) - @utils.cached_property - def dm(self): - dm = PETSc.DMShell().create(comm=self.comm) - dm.setGlobalVector(self.layout_vec) - return dm + dims = [[None for _ in range(self.shape[1])] for _ in range(self.shape[0])] + for r in range(self.shape[0]): + for c in range(self.shape[1]): + dims[r][c] = next(tmp) + self._dims = tuple(tuple(d) for d in dims) -class GlobalDataSet(base.GlobalDataSet): + if self.lcomm != self.rcomm: + raise ValueError("Haven't thought hard enough about different left and right communicators") + self.comm = self.lcomm - @utils.cached_property - def lgmap(self): - """A PETSc LGMap mapping process-local indices to global - indices for this :class:`DataSet`. - """ - lgmap = PETSc.LGMap() - lgmap.create(indices=np.arange(1, dtype=IntType), - bsize=self.cdim, comm=self.comm) - return lgmap + self._name = name or "sparsity_#x%x" % id(self) + + self.iteration_regions = iteration_regions + # If the Sparsity is defined on MixedDataSets, we need to build each + # block separately + if (isinstance(dsets[0], MixedDataSet) or isinstance(dsets[1], MixedDataSet)) \ + and nest: + self._nested = True + self._blocks = [] + for i, rds in enumerate(dsets[0]): + row = [] + for j, cds in enumerate(dsets[1]): + row.append(Sparsity((rds, cds), [(rm.split[i], cm.split[j]) for + rm, cm in maps], + iteration_regions=iteration_regions, + block_sparse=block_sparse)) + self._blocks.append(row) + self._d_nnz = tuple(s._d_nnz for s in self) + self._o_nnz = tuple(s._o_nnz for s in self) + elif isinstance(dsets[0], GlobalDataSet) or isinstance(dsets[1], GlobalDataSet): + # Where the sparsity maps either from or to a Global, we + # don't really have any sparsity structure. + self._blocks = [[self]] + self._nested = False + else: + for dset in dsets: + if isinstance(dset, MixedDataSet) and any([isinstance(d, GlobalDataSet) for d in dset]): + raise ex.SparsityFormatError("Mixed monolithic matrices with Global rows or columns are not supported.") + self._nested = False + with profiling.timed_region("CreateSparsity"): + nnz, onnz = sparsity.build_sparsity(self) + self._d_nnz = nnz + self._o_nnz = onnz + self._blocks = [[self]] + self._initialized = True + + _cache = {} - @utils.cached_property - def unblocked_lgmap(self): - """A PETSc LGMap mapping process-local indices to global - indices for this :class:`DataSet` with a block size of 1. - """ - if self.cdim == 1: - return self.lgmap + @classmethod + @utils.validate_type(('dsets', (Set, DataSet, tuple, list), ex.DataSetTypeError), + ('maps', (Map, tuple, list), ex.MapTypeError)) + def _process_args(cls, dsets, maps, *, iteration_regions=None, name=None, nest=None, block_sparse=None): + "Turn maps argument into a canonical tuple of pairs." + from pyop2.parloop import IterationRegion + + # A single data set becomes a pair of identical data sets + dsets = [dsets, dsets] if isinstance(dsets, (Set, DataSet)) else list(dsets) + # Upcast Sets to DataSets + dsets = [s ** 1 if isinstance(s, Set) else s for s in dsets] + + # Check data sets are valid + for dset in dsets: + if not isinstance(dset, DataSet) and dset is not None: + raise ex.DataSetTypeError("All data sets must be of type DataSet, not type %r" % type(dset)) + + # A single map becomes a pair of identical maps + maps = (maps, maps) if isinstance(maps, Map) else maps + # A single pair becomes a tuple of one pair + maps = (maps,) if isinstance(maps[0], Map) else maps + + # Check maps are sane + for pair in maps: + if pair[0] is None or pair[1] is None: + # None of this checking makes sense if one of the + # matrix operands is a Global. + continue + for m in pair: + if not isinstance(m, Map): + raise ex.MapTypeError( + "All maps must be of type map, not type %r" % type(m)) + if len(m.values_with_halo) == 0 and m.iterset.total_size > 0: + raise ex.MapValueError( + "Unpopulated map values when trying to build sparsity.") + # Make sure that the "to" Set of each map in a pair is the set of + # the corresponding DataSet set + if not (pair[0].toset == dsets[0].set + and pair[1].toset == dsets[1].set): + raise RuntimeError("Map to set must be the same as corresponding DataSet set") + + # Each pair of maps must have the same from-set (iteration set) + if not pair[0].iterset == pair[1].iterset: + raise RuntimeError("Iterset of both maps in a pair must be the same") + + rmaps, cmaps = zip(*maps) + if iteration_regions is None: + iteration_regions = tuple((IterationRegion.ALL, ) for _ in maps) + else: + iteration_regions = tuple(tuple(sorted(region)) for region in iteration_regions) + if not len(rmaps) == len(cmaps): + raise RuntimeError("Must pass equal number of row and column maps") + + if rmaps[0] is not None and cmaps[0] is not None: + # Each row map must have the same to-set (data set) + if not all(m.toset == rmaps[0].toset for m in rmaps): + raise RuntimeError("To set of all row maps must be the same") + + # Each column map must have the same to-set (data set) + if not all(m.toset == cmaps[0].toset for m in cmaps): + raise RuntimeError("To set of all column maps must be the same") + + # Need to return the caching object, a tuple of the processed + # arguments and a dict of kwargs (empty in this case) + if isinstance(dsets[0], GlobalDataSet): + cache = None + elif isinstance(dsets[0].set, MixedSet): + cache = dsets[0].set[0] else: - indices = self.lgmap.indices - lgmap = PETSc.LGMap().create(indices=indices, - bsize=1, comm=self.lgmap.comm) - return lgmap + cache = dsets[0].set + if nest is None: + nest = conf.configuration["matnest"] + if block_sparse is None: + block_sparse = conf.configuration["block_sparsity"] + + maps = frozenset(zip(maps, iteration_regions)) + kwargs = {"name": name, + "nest": nest, + "block_sparse": block_sparse} + return (cache,) + (tuple(dsets), maps), kwargs + + @classmethod + def _cache_key(cls, dsets, maps, name, nest, block_sparse, *args, **kwargs): + return (dsets, maps, nest, block_sparse) + + def __getitem__(self, idx): + """Return :class:`Sparsity` block with row and column given by ``idx`` + or a given row of blocks.""" + try: + i, j = idx + return self._blocks[i][j] + except TypeError: + return self._blocks[idx] @utils.cached_property - def field_ises(self): - """A list of PETSc ISes defining the global indices for each set in - the DataSet. - - Used when extracting blocks from matrices for solvers.""" - ises = [] - nlocal_rows = 0 - for dset in self: - nlocal_rows += dset.size * dset.cdim - offset = self.comm.scan(nlocal_rows) - offset -= nlocal_rows - for dset in self: - nrows = dset.size * dset.cdim - iset = PETSc.IS().createStride(nrows, first=offset, step=1, - comm=self.comm) - iset.setBlockSize(dset.cdim) - ises.append(iset) - offset += nrows - return tuple(ises) + def dsets(self): + r"""A pair of :class:`DataSet`\s for the left and right function + spaces this :class:`Sparsity` maps between.""" + return self._dsets @utils.cached_property - def local_ises(self): - """A list of PETSc ISes defining the local indices for each set in the DataSet. + def maps(self): + """A list of pairs (rmap, cmap) where each pair of + :class:`Map` objects will later be used to assemble into this + matrix. The iterset of each of the maps in a pair must be the + same, while the toset of all the maps which appear first + must be common, this will form the row :class:`Set` of the + sparsity. Similarly, the toset of all the maps which appear + second must be common and will form the column :class:`Set` of + the ``Sparsity``.""" + return list(zip(self._rmaps, self._cmaps)) - Used when extracting blocks from matrices for assembly.""" - raise NotImplementedError + @utils.cached_property + def cmaps(self): + """The list of column maps this sparsity is assembled from.""" + return self._cmaps @utils.cached_property - def layout_vec(self): - """A PETSc Vec compatible with the dof layout of this DataSet.""" - vec = PETSc.Vec().create(comm=self.comm) - size = (self.size * self.cdim, None) - vec.setSizes(size, bsize=self.cdim) - vec.setUp() - return vec + def rmaps(self): + """The list of row maps this sparsity is assembled from.""" + return self._rmaps @utils.cached_property - def dm(self): - dm = PETSc.DMShell().create(comm=self.comm) - dm.setGlobalVector(self.layout_vec) - return dm + def dims(self): + """A tuple of tuples where the ``i,j``th entry + is a pair giving the number of rows per entry of the row + :class:`Set` and the number of columns per entry of the column + :class:`Set` of the ``Sparsity``. The extents of the first + two indices are given by the :attr:`shape` of the sparsity. + """ + return self._dims + @utils.cached_property + def shape(self): + """Number of block rows and columns.""" + return (len(self._dsets[0] or [1]), + len(self._dsets[1] or [1])) -class MixedDataSet(DataSet, base.MixedDataSet): + @utils.cached_property + def nrows(self): + """The number of rows in the ``Sparsity``.""" + return self._nrows @utils.cached_property - def layout_vec(self): - """A PETSc Vec compatible with the dof layout of this MixedDataSet.""" - vec = PETSc.Vec().create(comm=self.comm) - # Compute local and global size from sizes of layout vecs - lsize, gsize = map(sum, zip(*(d.layout_vec.sizes for d in self))) - vec.setSizes((lsize, gsize), bsize=1) - vec.setUp() - return vec + def ncols(self): + """The number of columns in the ``Sparsity``.""" + return self._ncols @utils.cached_property - def lgmap(self): - """A PETSc LGMap mapping process-local indices to global - indices for this :class:`MixedDataSet`. + def nested(self): + r"""Whether a sparsity is monolithic (even if it has a block structure). + + To elaborate, if a sparsity maps between + :class:`MixedDataSet`\s, it can either be nested, in which + case it consists of as many blocks are the product of the + length of the datasets it maps between, or monolithic. In the + latter case the sparsity is for the full map between the mixed + datasets, rather than between the blocks of the non-mixed + datasets underneath them. """ - lgmap = PETSc.LGMap() - if self.comm.size == 1: - size = sum(s.size * s.cdim for s in self) - lgmap.create(indices=np.arange(size, dtype=IntType), - bsize=1, comm=self.comm) - return lgmap - # Compute local to global maps for a monolithic mixed system - # from the individual local to global maps for each field. - # Exposition: - # - # We have N fields and P processes. The global row - # ordering is: - # - # f_0_p_0, f_1_p_0, ..., f_N_p_0; f_0_p_1, ..., ; f_0_p_P, - # ..., f_N_p_P. - # - # We have per-field local to global numberings, to convert - # these into multi-field local to global numberings, we note - # the following: - # - # For each entry in the per-field l2g map, we first determine - # the rank that entry belongs to, call this r. - # - # We know that this must be offset by: - # 1. The sum of all field lengths with rank < r - # 2. The sum of all lower-numbered field lengths on rank r. - # - # Finally, we need to shift the field-local entry by the - # current field offset. - idx_size = sum(s.total_size*s.cdim for s in self) - indices = np.full(idx_size, -1, dtype=IntType) - owned_sz = np.array([sum(s.size * s.cdim for s in self)], - dtype=IntType) - field_offset = np.empty_like(owned_sz) - self.comm.Scan(owned_sz, field_offset) - field_offset -= owned_sz - - all_field_offsets = np.empty(self.comm.size, dtype=IntType) - self.comm.Allgather(field_offset, all_field_offsets) - - start = 0 - all_local_offsets = np.zeros(self.comm.size, dtype=IntType) - current_offsets = np.zeros(self.comm.size + 1, dtype=IntType) - for s in self: - idx = indices[start:start + s.total_size * s.cdim] - owned_sz[0] = s.size * s.cdim - self.comm.Scan(owned_sz, field_offset) - self.comm.Allgather(field_offset, current_offsets[1:]) - # Find the ranks each entry in the l2g belongs to - l2g = s.unblocked_lgmap.indices - tmp_indices = np.searchsorted(current_offsets, l2g, side="right") - 1 - idx[:] = l2g[:] - current_offsets[tmp_indices] + \ - all_field_offsets[tmp_indices] + all_local_offsets[tmp_indices] - self.comm.Allgather(owned_sz, current_offsets[1:]) - all_local_offsets += current_offsets[1:] - start += s.total_size * s.cdim - lgmap.create(indices=indices, bsize=1, comm=self.comm) - return lgmap + return self._nested @utils.cached_property - def unblocked_lgmap(self): - """A PETSc LGMap mapping process-local indices to global - indices for this :class:`DataSet` with a block size of 1. - """ - return self.lgmap + def name(self): + """A user-defined label.""" + return self._name + def __iter__(self): + r"""Iterate over all :class:`Sparsity`\s by row and then by column.""" + for row in self._blocks: + for s in row: + yield s -class VecAccessMixin(metaclass=abc.ABCMeta): - @abc.abstractmethod - def vec_context(self, access): - pass + def __str__(self): + return "OP2 Sparsity: dsets %s, rmaps %s, cmaps %s, name %s" % \ + (self._dsets, self._rmaps, self._cmaps, self._name) - @abc.abstractproperty - def _vec(self): - pass + def __repr__(self): + return "Sparsity(%r, %r, %r)" % (self.dsets, self.maps, self.name) - @property - @collective - def vec(self): - """Context manager for a PETSc Vec appropriate for this Dat. + @utils.cached_property + def nnz(self): + """Array containing the number of non-zeroes in the various rows of the + diagonal portion of the local submatrix. - You're allowed to modify the data you get back from this view.""" - return self.vec_context(access=base.RW) + This is the same as the parameter `d_nnz` used for preallocation in + PETSc's MatMPIAIJSetPreallocation_.""" + return self._d_nnz - @property - @collective - def vec_wo(self): - """Context manager for a PETSc Vec appropriate for this Dat. + @utils.cached_property + def onnz(self): + """Array containing the number of non-zeroes in the various rows of the + off-diagonal portion of the local submatrix. - You're allowed to modify the data you get back from this view, - but you cannot read from it.""" - return self.vec_context(access=base.WRITE) + This is the same as the parameter `o_nnz` used for preallocation in + PETSc's MatMPIAIJSetPreallocation_.""" + return self._o_nnz - @property - @collective - def vec_ro(self): - """Context manager for a PETSc Vec appropriate for this Dat. + @utils.cached_property + def nz(self): + return self._d_nnz.sum() - You're not allowed to modify the data you get back from this view.""" - return self.vec_context(access=base.READ) + @utils.cached_property + def onz(self): + return self._o_nnz.sum() + def __contains__(self, other): + """Return true if other is a pair of maps in self.maps(). This + will also return true if the elements of other have parents in + self.maps().""" -class Dat(base.Dat, VecAccessMixin): - @utils.cached_property - def _vec(self): - assert self.dtype == PETSc.ScalarType, \ - "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) - # Can't duplicate layout_vec of dataset, because we then - # carry around extra unnecessary data. - # But use getSizes to save an Allreduce in computing the - # global size. - size = self.dataset.layout_vec.getSizes() - data = self._data[:size[0]] - return PETSc.Vec().createWithArray(data, size=size, bsize=self.cdim, comm=self.comm) - - @contextmanager - def vec_context(self, access): - r"""A context manager for a :class:`PETSc.Vec` from a :class:`Dat`. - - :param access: Access descriptor: READ, WRITE, or RW.""" - # PETSc Vecs have a state counter and cache norm computations - # to return immediately if the state counter is unchanged. - # Since we've updated the data behind their back, we need to - # change that state counter. - self._vec.stateIncrease() - yield self._vec - if access is not base.READ: - self.halo_valid = False - - -class MixedDat(base.MixedDat, VecAccessMixin): - @utils.cached_property - def _vec(self): - assert self.dtype == PETSc.ScalarType, \ - "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) - # In this case we can just duplicate the layout vec - # because we're not placing an array. - return self.dataset.layout_vec.duplicate() - - @contextmanager - def vec_context(self, access): - r"""A context manager scattering the arrays of all components of this - :class:`MixedDat` into a contiguous :class:`PETSc.Vec` and reverse - scattering to the original arrays when exiting the context. - - :param access: Access descriptor: READ, WRITE, or RW. - - .. note:: - - The :class:`~PETSc.Vec` obtained from this context is in - the correct order to be left multiplied by a compatible - :class:`MixedMat`. In parallel it is *not* just a - concatenation of the underlying :class:`Dat`\s.""" - # Do the actual forward scatter to fill the full vector with - # values - if access is not base.WRITE: - offset = 0 - array = self._vec.array - for d in self: - with d.vec_ro as v: - size = v.local_size - array[offset:offset+size] = v.array_r[:] - offset += size - self._vec.stateIncrease() - yield self._vec - if access is not base.READ: - # Reverse scatter to get the values back to their original locations - offset = 0 - array = self._vec.array_r - for d in self: - with d.vec_wo as v: - size = v.local_size - v.array[:] = array[offset:offset+size] - offset += size - self.halo_valid = False - - -class Global(base.Global, VecAccessMixin): - @utils.cached_property - def _vec(self): - assert self.dtype == PETSc.ScalarType, \ - "Can't create Vec with type %s, must be %s" % (self.dtype, PETSc.ScalarType) - # Can't duplicate layout_vec of dataset, because we then - # carry around extra unnecessary data. - # But use getSizes to save an Allreduce in computing the - # global size. - data = self._data - size = self.dataset.layout_vec.getSizes() - if self.comm.rank == 0: - return PETSc.Vec().createWithArray(data, size=size, - bsize=self.cdim, - comm=self.comm) - else: - return PETSc.Vec().createWithArray(np.empty(0, dtype=self.dtype), - size=size, - bsize=self.cdim, - comm=self.comm) - - @contextmanager - def vec_context(self, access): - """A context manager for a :class:`PETSc.Vec` from a :class:`Global`. - - :param access: Access descriptor: READ, WRITE, or RW.""" - # PETSc Vecs have a state counter and cache norm computations - # to return immediately if the state counter is unchanged. - # Since we've updated the data behind their back, we need to - # change that state counter. - self._vec.stateIncrease() - yield self._vec - if access is not base.READ: - data = self._data - self.comm.Bcast(data, 0) - - -class SparsityBlock(base.Sparsity): + for maps in self.maps: + if tuple(other) <= maps: + return True + + return False + + +class SparsityBlock(Sparsity): """A proxy class for a block in a monolithic :class:`.Sparsity`. :arg parent: The parent monolithic sparsity. @@ -487,119 +401,216 @@ def masked_lgmap(lgmap, mask, block=True): return PETSc.LGMap().create(indices=indices, bsize=bsize, comm=lgmap.comm) -class MatBlock(base.Mat): - """A proxy class for a local block in a monolithic :class:`.Mat`. +class AbstractMat(DataCarrier, abc.ABC): + r"""OP2 matrix data. A ``Mat`` is defined on a sparsity pattern and holds a value + for each element in the :class:`Sparsity`. - :arg parent: The parent monolithic matrix. - :arg i: The block row. - :arg j: The block column. - """ - def __init__(self, parent, i, j): - self._parent = parent - self._i = i - self._j = j - self._sparsity = SparsityBlock(parent.sparsity, i, j) - rset, cset = self._parent.sparsity.dsets - rowis = rset.local_ises[i] - colis = cset.local_ises[j] - self.handle = parent.handle.getLocalSubMatrix(isrow=rowis, - iscol=colis) - self.comm = parent.comm - self.local_to_global_maps = self.handle.getLGMap() + When a ``Mat`` is passed to :func:`pyop2.op2.par_loop`, the maps via which + indirection occurs for the row and column space, and the access + descriptor are passed by `calling` the ``Mat``. For instance, if a + ``Mat`` named ``A`` is to be accessed for reading via a row :class:`Map` + named ``R`` and a column :class:`Map` named ``C``, this is accomplished by:: - @utils.cached_property - def _kernel_args_(self): - return (self.handle.handle, ) + A(pyop2.READ, (R[pyop2.i[0]], C[pyop2.i[1]])) - @utils.cached_property - def _wrapper_cache_key_(self): - return (type(self._parent), self._parent.dtype, self.dims) + Notice that it is `always` necessary to index the indirection maps + for a ``Mat``. See the :class:`Mat` documentation for more + details. - @property - def assembly_state(self): - # Track our assembly state only - return self._parent.assembly_state + .. note :: - @assembly_state.setter - def assembly_state(self, value): - self._parent.assembly_state = value + After executing :func:`par_loop`\s that write to a ``Mat`` and + before using it (for example to view its values), you must call + :meth:`assemble` to finalise the writes. + """ + @utils.cached_property + def pack(self): + from pyop2.codegen.builder import MatPack + return MatPack + + ASSEMBLED = "ASSEMBLED" + INSERT_VALUES = "INSERT_VALUES" + ADD_VALUES = "ADD_VALUES" + + _modes = [Access.WRITE, Access.INC] + + @utils.validate_type(('sparsity', Sparsity, ex.SparsityTypeError), + ('name', str, ex.NameTypeError)) + def __init__(self, sparsity, dtype=None, name=None): + self._sparsity = sparsity + self.lcomm = sparsity.lcomm + self.rcomm = sparsity.rcomm + self.comm = sparsity.comm + dtype = dtype or dtypes.ScalarType + self._datatype = np.dtype(dtype) + self._name = name or "mat_#x%x" % id(self) + self.assembly_state = Mat.ASSEMBLED - def __getitem__(self, idx): - return self + @utils.validate_in(('access', _modes, ex.ModeValueError)) + def __call__(self, access, path, lgmaps=None, unroll_map=False): + from pyop2.parloop import Arg + path_maps = utils.as_tuple(path, Map, 2) + if conf.configuration["type_check"] and tuple(path_maps) not in self.sparsity: + raise ex.MapValueError("Path maps not in sparsity maps") + return Arg(data=self, map=path_maps, access=access, lgmaps=lgmaps, unroll_map=unroll_map) - def __iter__(self): - yield self + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), self.dtype, self.dims) - def _flush_assembly(self): - # Need to flush for all blocks - for b in self._parent: - b.handle.assemble(assembly=PETSc.Mat.AssemblyType.FLUSH) - self._parent._flush_assembly() + def assemble(self): + """Finalise this :class:`Mat` ready for use. - def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): - rows = np.asarray(rows, dtype=IntType) - rbs, _ = self.dims[0][0] - if rbs > 1: - if idx is not None: - rows = rbs * rows + idx - else: - rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten() - rows = rows.reshape(-1, 1) - self.change_assembly_state(Mat.INSERT_VALUES) - if len(rows) > 0: - values = np.full(rows.shape, diag_val, dtype=ScalarType) - self.handle.setValuesLocalRCV(rows, rows, values, - addv=PETSc.InsertMode.INSERT_VALUES) + Call this /after/ executing all the par_loops that write to + the matrix before you want to look at it. + """ + raise NotImplementedError("Subclass should implement this") def addto_values(self, rows, cols, values): """Add a block of values to the :class:`Mat`.""" - self.change_assembly_state(Mat.ADD_VALUES) - if len(values) > 0: - self.handle.setValuesBlockedLocal(rows, cols, values, - addv=PETSc.InsertMode.ADD_VALUES) + raise NotImplementedError( + "Abstract Mat base class doesn't know how to set values.") def set_values(self, rows, cols, values): """Set a block of values in the :class:`Mat`.""" - self.change_assembly_state(Mat.INSERT_VALUES) - if len(values) > 0: - self.handle.setValuesBlockedLocal(rows, cols, values, - addv=PETSc.InsertMode.INSERT_VALUES) + raise NotImplementedError( + "Abstract Mat base class doesn't know how to set values.") - def assemble(self): - raise RuntimeError("Should never call assemble on MatBlock") + @utils.cached_property + def nblocks(self): + return int(np.prod(self.sparsity.shape)) + + @utils.cached_property + def _argtypes_(self): + """Ctypes argtype for this :class:`Mat`""" + return tuple(ctypes.c_voidp for _ in self) + + @utils.cached_property + def dims(self): + """A pair of integers giving the number of matrix rows and columns for + each member of the row :class:`Set` and column :class:`Set` + respectively. This corresponds to the ``cdim`` member of a + :class:`DataSet`.""" + return self._sparsity._dims + + @utils.cached_property + def nrows(self): + "The number of rows in the matrix (local to this process)" + return sum(d.size * d.cdim for d in self.sparsity.dsets[0]) + + @utils.cached_property + def nblock_rows(self): + """The number "block" rows in the matrix (local to this process). + + This is equivalent to the number of rows in the matrix divided + by the dimension of the row :class:`DataSet`. + """ + assert len(self.sparsity.dsets[0]) == 1, "Block rows don't make sense for mixed Mats" + return self.sparsity.dsets[0].size + + @utils.cached_property + def nblock_cols(self): + """The number of "block" columns in the matrix (local to this process). + + This is equivalent to the number of columns in the matrix + divided by the dimension of the column :class:`DataSet`. + """ + assert len(self.sparsity.dsets[1]) == 1, "Block cols don't make sense for mixed Mats" + return self.sparsity.dsets[1].size + + @utils.cached_property + def ncols(self): + "The number of columns in the matrix (local to this process)" + return sum(d.size * d.cdim for d in self.sparsity.dsets[1]) + + @utils.cached_property + def sparsity(self): + """:class:`Sparsity` on which the ``Mat`` is defined.""" + return self._sparsity + + @utils.cached_property + def _is_scalar_field(self): + # Sparsity from Dat to MixedDat has a shape like (1, (1, 1)) + # (which you can't take the product of) + return all(np.prod(d) == 1 for d in self.dims) + + @utils.cached_property + def _is_vector_field(self): + return not self._is_scalar_field + + def change_assembly_state(self, new_state): + """Switch the matrix assembly state.""" + if new_state == Mat.ASSEMBLED or self.assembly_state == Mat.ASSEMBLED: + self.assembly_state = new_state + elif new_state != self.assembly_state: + self._flush_assembly() + self.assembly_state = new_state + else: + pass + + def _flush_assembly(self): + """Flush the in flight assembly operations (used when + switching between inserting and adding values).""" + pass @property def values(self): - rset, cset = self._parent.sparsity.dsets - rowis = rset.field_ises[self._i] - colis = cset.field_ises[self._j] - self._parent.assemble() - mat = self._parent.handle.createSubMatrix(isrow=rowis, - iscol=colis) - return mat[:, :] + """A numpy array of matrix values. - @property + .. warning :: + This is a dense array, so will need a lot of memory. It's + probably not a good idea to access this property if your + matrix has more than around 10000 degrees of freedom. + """ + raise NotImplementedError("Abstract base Mat does not implement values()") + + @utils.cached_property def dtype(self): - return self._parent.dtype + """The Python type of the data.""" + return self._datatype - @property + @utils.cached_property def nbytes(self): - return self._parent.nbytes // (np.prod(self.sparsity.shape)) + """Return an estimate of the size of the data associated with this + :class:`Mat` in bytes. This will be the correct size of the + data payload, but does not take into account the (presumably + small) overhead of the object and its metadata. The memory + associated with the sparsity pattern is also not recorded. + + Note that this is the process local memory usage, not the sum + over all MPI processes. + """ + if self._sparsity._block_sparse: + mult = np.sum(np.prod(self._sparsity.dims)) + else: + mult = 1 + return (self._sparsity.nz + self._sparsity.onz) \ + * self.dtype.itemsize * mult - def __repr__(self): - return "MatBlock(%r, %r, %r)" % (self._parent, self._i, self._j) + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __mul__(self, other): + """Multiply this :class:`Mat` with the vector ``other``.""" + raise NotImplementedError("Abstract base Mat does not implement multiplication") def __str__(self): - return "Block[%s, %s] of %s" % (self._i, self._j, self._parent) + return "OP2 Mat: %s, sparsity (%s), datatype %s" \ + % (self._name, self._sparsity, self._datatype.name) + + def __repr__(self): + return "Mat(%r, %r, %r)" \ + % (self._sparsity, self._datatype, self._name) -class Mat(base.Mat): +class Mat(AbstractMat): """OP2 matrix data. A Mat is defined on a sparsity pattern and holds a value for each element in the :class:`Sparsity`.""" def __init__(self, *args, **kwargs): self.mat_type = kwargs.pop("mat_type", None) - base.Mat.__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) self._init() self.assembly_state = Mat.ASSEMBLED @@ -610,7 +621,7 @@ def __init__(self, *args, **kwargs): def _kernel_args_(self): return tuple(a.handle.handle for a in self) - @collective + @mpi.collective def _init(self): if not self.dtype == PETSc.ScalarType: raise RuntimeError("Can only create a matrix of type %s, %s is not supported" @@ -648,7 +659,7 @@ def _init_dense(self): mat.setOption(mat.Option.SUBSET_OFF_PROC_ENTRIES, True) mat.setUp() # Put zeros in all the places we might eventually put a value. - with timed_region("MatZeroInitial"): + with profiling.timed_region("MatZeroInitial"): mat.zeroEntries() mat.assemble() @@ -680,7 +691,7 @@ def _init_monolithic(self): # The first assembly (filling with zeros) sets all possible entries. mat.setOption(mat.Option.SUBSET_OFF_PROC_ENTRIES, True) # Put zeros in all the places we might eventually put a value. - with timed_region("MatZeroInitial"): + with profiling.timed_region("MatZeroInitial"): for i in range(rows): for j in range(cols): sparsity.fill_with_zeros(self[i, j].handle, @@ -757,7 +768,7 @@ def _init_block(self): # entries, so raise an error if we "missed" one. mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) # Put zeros in all the places we might eventually put a value. - with timed_region("MatZeroInitial"): + with profiling.timed_region("MatZeroInitial"): sparsity.fill_with_zeros(mat, self.sparsity.dims[0][0], self.sparsity.maps, self.sparsity.iteration_regions, set_diag=self.sparsity._has_diagonal) @@ -783,17 +794,15 @@ def _init_global_block(self): def __call__(self, access, path, lgmaps=None, unroll_map=False): """Override the parent __call__ method in order to special-case global blocks in matrices.""" + from pyop2.parloop import Arg # One of the path entries was not an Arg. if path == (None, None): lgmaps, = lgmaps assert all(l is None for l in lgmaps) - return _make_object('Arg', - data=self.handle.getPythonContext().global_, - access=access) + return Arg(data=self.handle.getPythonContext().global_, access=access) elif None in path: thispath = path[0] or path[1] - return _make_object('Arg', data=self.handle.getPythonContext().dat, - map=thispath, access=access) + return Arg(data=self.handle.getPythonContext().dat, map=thispath, access=access) else: return super().__call__(access, path, lgmaps=lgmaps, unroll_map=unroll_map) @@ -810,13 +819,13 @@ def __iter__(self): """Iterate over all :class:`Mat` blocks by row and then by column.""" yield from itertools.chain(*self.blocks) - @collective + @mpi.collective def zero(self): """Zero the matrix.""" self.assemble() self.handle.zeroEntries() - @collective + @mpi.collective def zero_rows(self, rows, diag_val=1.0): """Zeroes the specified rows of the matrix, with the exception of the diagonal entry, which is set to diag_val. May be used for applying @@ -830,7 +839,7 @@ def zero_rows(self, rows, diag_val=1.0): def _flush_assembly(self): self.handle.assemble(assembly=PETSc.Mat.AssemblyType.FLUSH) - @collective + @mpi.collective def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): """Set the diagonal entry in ``rows`` to a particular value. @@ -840,7 +849,7 @@ def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): The indices in ``rows`` should index the process-local rows of the matrix (no mapping to global indexes is applied). """ - rows = np.asarray(rows, dtype=IntType) + rows = np.asarray(rows, dtype=dtypes.IntType) rbs, _ = self.dims[0][0] if rbs > 1: if idx is not None: @@ -850,11 +859,11 @@ def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): rows = rows.reshape(-1, 1) self.change_assembly_state(Mat.INSERT_VALUES) if len(rows) > 0: - values = np.full(rows.shape, diag_val, dtype=ScalarType) + values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType) self.handle.setValuesLocalRCV(rows, rows, values, addv=PETSc.InsertMode.INSERT_VALUES) - @collective + @mpi.collective def assemble(self): # If the matrix is nested, we need to check each subblock to # see if it needs assembling. But if it's monolithic then the @@ -902,10 +911,110 @@ def values(self): return self.handle[:, :] -class ParLoop(base.ParLoop): +class MatBlock(AbstractMat): + """A proxy class for a local block in a monolithic :class:`.Mat`. + + :arg parent: The parent monolithic matrix. + :arg i: The block row. + :arg j: The block column. + """ + def __init__(self, parent, i, j): + self._parent = parent + self._i = i + self._j = j + self._sparsity = SparsityBlock(parent.sparsity, i, j) + rset, cset = self._parent.sparsity.dsets + rowis = rset.local_ises[i] + colis = cset.local_ises[j] + self.handle = parent.handle.getLocalSubMatrix(isrow=rowis, + iscol=colis) + self.comm = parent.comm + self.local_to_global_maps = self.handle.getLGMap() + + @utils.cached_property + def _kernel_args_(self): + return (self.handle.handle, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self._parent), self._parent.dtype, self.dims) + + @property + def assembly_state(self): + # Track our assembly state only + return self._parent.assembly_state + + @assembly_state.setter + def assembly_state(self, value): + self._parent.assembly_state = value + + def __getitem__(self, idx): + return self + + def __iter__(self): + yield self + + def _flush_assembly(self): + # Need to flush for all blocks + for b in self._parent: + b.handle.assemble(assembly=PETSc.Mat.AssemblyType.FLUSH) + self._parent._flush_assembly() + + def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): + rows = np.asarray(rows, dtype=dtypes.IntType) + rbs, _ = self.dims[0][0] + if rbs > 1: + if idx is not None: + rows = rbs * rows + idx + else: + rows = np.dstack([rbs*rows + i for i in range(rbs)]).flatten() + rows = rows.reshape(-1, 1) + self.change_assembly_state(Mat.INSERT_VALUES) + if len(rows) > 0: + values = np.full(rows.shape, diag_val, dtype=dtypes.ScalarType) + self.handle.setValuesLocalRCV(rows, rows, values, + addv=PETSc.InsertMode.INSERT_VALUES) + + def addto_values(self, rows, cols, values): + """Add a block of values to the :class:`Mat`.""" + self.change_assembly_state(Mat.ADD_VALUES) + if len(values) > 0: + self.handle.setValuesBlockedLocal(rows, cols, values, + addv=PETSc.InsertMode.ADD_VALUES) + + def set_values(self, rows, cols, values): + """Set a block of values in the :class:`Mat`.""" + self.change_assembly_state(Mat.INSERT_VALUES) + if len(values) > 0: + self.handle.setValuesBlockedLocal(rows, cols, values, + addv=PETSc.InsertMode.INSERT_VALUES) + + def assemble(self): + raise RuntimeError("Should never call assemble on MatBlock") - def log_flops(self, flops): - PETSc.Log.logFlops(flops) + @property + def values(self): + rset, cset = self._parent.sparsity.dsets + rowis = rset.field_ises[self._i] + colis = cset.field_ises[self._j] + self._parent.assemble() + mat = self._parent.handle.createSubMatrix(isrow=rowis, + iscol=colis) + return mat[:, :] + + @property + def dtype(self): + return self._parent.dtype + + @property + def nbytes(self): + return self._parent.nbytes // (np.prod(self.sparsity.shape)) + + def __repr__(self): + return "MatBlock(%r, %r, %r)" % (self._parent, self._i, self._j) + + def __str__(self): + return "Block[%s, %s] of %s" % (self._i, self._j, self._parent) def _DatMat(sparsity, dat=None): @@ -926,9 +1035,10 @@ def _DatMat(sparsity, dat=None): return A -class _DatMatPayload(object): +class _DatMatPayload: def __init__(self, sparsity, dat=None, dset=None): + from pyop2.types.dat import Dat if isinstance(sparsity.dsets[0], GlobalDataSet): self.dset = sparsity.dsets[1] self.sizes = ((None, 1), (self.dset.size * self.dset.cdim, None)) @@ -939,7 +1049,7 @@ def __init__(self, sparsity, dat=None, dset=None): raise ValueError("Not a DatMat") self.sparsity = sparsity - self.dat = dat or _make_object("Dat", self.dset, dtype=PETSc.ScalarType) + self.dat = dat or Dat(self.dset, dtype=PETSc.ScalarType) self.dset = dset def __getitem__(self, key): @@ -963,7 +1073,7 @@ def mult(self, mat, x, y): # Column matrix if x.sizes[1] == 1: v.copy(y) - a = np.zeros(1, dtype=ScalarType) + a = np.zeros(1, dtype=dtypes.ScalarType) if x.comm.rank == 0: a[0] = x.array_r else: @@ -979,7 +1089,7 @@ def multTranspose(self, mat, x, y): # Row matrix if x.sizes[1] == 1: v.copy(y) - a = np.zeros(1, dtype=ScalarType) + a = np.zeros(1, dtype=dtypes.ScalarType) if x.comm.rank == 0: a[0] = x.array_r else: @@ -1003,7 +1113,7 @@ def multTransposeAdd(self, mat, x, y, z): # Row matrix if x.sizes[1] == 1: v.copy(z) - a = np.zeros(1, dtype=ScalarType) + a = np.zeros(1, dtype=dtypes.ScalarType) if x.comm.rank == 0: a[0] = x.array_r else: @@ -1049,10 +1159,11 @@ def _GlobalMat(global_=None, comm=None): return A -class _GlobalMatPayload(object): +class _GlobalMatPayload: def __init__(self, global_=None, comm=None): - self.global_ = global_ or _make_object("Global", 1, dtype=PETSc.ScalarType, comm=comm) + from pyop2.types.glob import Global + self.global_ = global_ or Global(1, dtype=PETSc.ScalarType, comm=comm) def __getitem__(self, key): return self.global_.data_ro.reshape(1, 1)[key] diff --git a/pyop2/types/set.py b/pyop2/types/set.py new file mode 100644 index 000000000..7702d87f7 --- /dev/null +++ b/pyop2/types/set.py @@ -0,0 +1,626 @@ +import ctypes +import functools +import numbers + +import numpy as np + +from pyop2 import ( + caching, + datatypes as dtypes, + exceptions as ex, + mpi, + utils +) + + +class Set: + + """OP2 set. + + :param size: The size of the set. + :type size: integer or list of four integers. + :param string name: The name of the set (optional). + :param halo: An exisiting halo to use (optional). + + When the set is employed as an iteration space in a + :func:`pyop2.op2.par_loop`, the extent of any local iteration space within + each set entry is indicated in brackets. See the example in + :func:`pyop2.op2.par_loop` for more details. + + The size of the set can either be an integer, or a list of four + integers. The latter case is used for running in parallel where + we distinguish between: + + - `CORE` (owned and not touching halo) + - `OWNED` (owned, touching halo) + - `EXECUTE HALO` (not owned, but executed over redundantly) + - `NON EXECUTE HALO` (not owned, read when executing in the execute halo) + + If a single integer is passed, we assume that we're running in + serial and there is no distinction. + + The division of set elements is: :: + + [0, CORE) + [CORE, OWNED) + [OWNED, GHOST) + + Halo send/receive data is stored on sets in a :class:`Halo`. + """ + + _CORE_SIZE = 0 + _OWNED_SIZE = 1 + _GHOST_SIZE = 2 + + _extruded = False + + _kernel_args_ = () + _argtypes_ = () + + @utils.cached_property + def _wrapper_cache_key_(self): + return (type(self), ) + + @utils.validate_type(('size', (numbers.Integral, tuple, list, np.ndarray), ex.SizeTypeError), + ('name', str, ex.NameTypeError)) + def __init__(self, size, name=None, halo=None, comm=None): + self.comm = mpi.dup_comm(comm) + if isinstance(size, numbers.Integral): + size = [size] * 3 + size = utils.as_tuple(size, numbers.Integral, 3) + assert size[Set._CORE_SIZE] <= size[Set._OWNED_SIZE] <= \ + size[Set._GHOST_SIZE], "Set received invalid sizes: %s" % size + self._sizes = size + self._name = name or "set_#x%x" % id(self) + self._halo = halo + self._partition_size = 1024 + # A cache of objects built on top of this set + self._cache = {} + + @utils.cached_property + def core_size(self): + """Core set size. Owned elements not touching halo elements.""" + return self._sizes[Set._CORE_SIZE] + + @utils.cached_property + def size(self): + """Set size, owned elements.""" + return self._sizes[Set._OWNED_SIZE] + + @utils.cached_property + def total_size(self): + """Set size including ghost elements. + """ + return self._sizes[Set._GHOST_SIZE] + + @utils.cached_property + def sizes(self): + """Set sizes: core, owned, execute halo, total.""" + return self._sizes + + @utils.cached_property + def core_part(self): + return SetPartition(self, 0, self.core_size) + + @utils.cached_property + def owned_part(self): + return SetPartition(self, self.core_size, self.size - self.core_size) + + @utils.cached_property + def name(self): + """User-defined label""" + return self._name + + @utils.cached_property + def halo(self): + """:class:`Halo` associated with this Set""" + return self._halo + + @property + def partition_size(self): + """Default partition size""" + return self._partition_size + + @partition_size.setter + def partition_size(self, partition_value): + """Set the partition size""" + self._partition_size = partition_value + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __getitem__(self, idx): + """Allow indexing to return self""" + assert idx == 0 + return self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 Set: %s with size %s" % (self._name, self.size) + + def __repr__(self): + return "Set(%r, %r)" % (self._sizes, self._name) + + def __call__(self, *indices): + """Build a :class:`Subset` from this :class:`Set` + + :arg indices: The elements of this :class:`Set` from which the + :class:`Subset` should be formed. + + """ + if len(indices) == 1: + indices = indices[0] + if np.isscalar(indices): + indices = [indices] + return Subset(self, indices) + + def __contains__(self, dset): + """Indicate whether a given DataSet is compatible with this Set.""" + from pyop2.types import DataSet + if isinstance(dset, DataSet): + return dset.set is self + else: + return False + + def __pow__(self, e): + """Derive a :class:`DataSet` with dimension ``e``""" + from pyop2.types import DataSet + return DataSet(self, dim=e) + + @utils.cached_property + def layers(self): + """Return None (not an :class:`ExtrudedSet`).""" + return None + + def _check_operands(self, other): + if type(other) is Set: + if other is not self: + raise ValueError("Uable to perform set operations between two unrelated sets: %s and %s." % (self, other)) + elif type(other) is Subset: + if self is not other._superset: + raise TypeError("Superset mismatch: self (%s) != other._superset (%s)" % (self, other._superset)) + else: + raise TypeError("Unable to perform set operations between `Set` and %s." % (type(other), )) + + def intersection(self, other): + self._check_operands(other) + return other + + def union(self, other): + self._check_operands(other) + return self + + def difference(self, other): + self._check_operands(other) + if other is self: + return Subset(self, []) + else: + return type(other)(self, np.setdiff1d(np.asarray(range(self.total_size), dtype=dtypes.IntType), other._indices)) + + def symmetric_difference(self, other): + self._check_operands(other) + return self.difference(other) + + +class GlobalSet(Set): + + _extruded = False + + """A proxy set allowing a :class:`Global` to be used in place of a + :class:`Dat` where appropriate.""" + + _kernel_args_ = () + _argtypes_ = () + + def __init__(self, comm=None): + self.comm = mpi.dup_comm(comm) + self._cache = {} + + @utils.cached_property + def core_size(self): + return 0 + + @utils.cached_property + def size(self): + return 1 if self.comm.rank == 0 else 0 + + @utils.cached_property + def total_size(self): + """Total set size, including halo elements.""" + return 1 if self.comm.rank == 0 else 0 + + @utils.cached_property + def sizes(self): + """Set sizes: core, owned, execute halo, total.""" + return (self.core_size, self.size, self.total_size) + + @utils.cached_property + def name(self): + """User-defined label""" + return "GlobalSet" + + @utils.cached_property + def halo(self): + """:class:`Halo` associated with this Set""" + return None + + @property + def partition_size(self): + """Default partition size""" + return None + + def __iter__(self): + """Yield self when iterated over.""" + yield self + + def __getitem__(self, idx): + """Allow indexing to return self""" + assert idx == 0 + return self + + def __len__(self): + """This is not a mixed type and therefore of length 1.""" + return 1 + + def __str__(self): + return "OP2 GlobalSet" + + def __repr__(self): + return "GlobalSet()" + + def __eq__(self, other): + # Currently all GlobalSets compare equal. + return isinstance(other, GlobalSet) + + def __hash__(self): + # Currently all GlobalSets compare equal. + return hash(type(self)) + + +class ExtrudedSet(Set): + + """OP2 ExtrudedSet. + + :param parent: The parent :class:`Set` to build this :class:`ExtrudedSet` on top of + :type parent: a :class:`Set`. + :param layers: The number of layers in this :class:`ExtrudedSet`. + :type layers: an integer, indicating the number of layers for every entity, + or an array of shape (parent.total_size, 2) giving the start + and one past the stop layer for every entity. An entry + ``a, b = layers[e, ...]`` means that the layers for entity + ``e`` run over :math:`[a, b)`. + + The number of layers indicates the number of time the base set is + extruded in the direction of the :class:`ExtrudedSet`. As a + result, there are ``layers-1`` extruded "cells" in an extruded set. + """ + + @utils.validate_type(('parent', Set, TypeError)) + def __init__(self, parent, layers): + self._parent = parent + try: + layers = utils.verify_reshape(layers, dtypes.IntType, (parent.total_size, 2)) + self.constant_layers = False + if layers.min() < 0: + raise ex.SizeTypeError("Bottom of layers must be >= 0") + if any(layers[:, 1] - layers[:, 0] < 1): + raise ex.SizeTypeError("Number of layers must be >= 0") + except ex.DataValueError: + # Legacy, integer + layers = np.asarray(layers, dtype=dtypes.IntType) + if layers.shape: + raise ex.SizeTypeError(f"Specifying layers per entity, but provided " + f"{layers.shape}, needed ({parent.total_size}, 2)") + if layers < 2: + raise ex.SizeTypeError("Need at least two layers, not %d", layers) + layers = np.asarray([[0, layers]], dtype=dtypes.IntType) + self.constant_layers = True + + self._layers = layers + self._extruded = True + + @utils.cached_property + def _kernel_args_(self): + return (self.layers_array.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return (ctypes.c_voidp, ) + + @utils.cached_property + def _wrapper_cache_key_(self): + return self.parent._wrapper_cache_key_ + (self.constant_layers, ) + + def __getattr__(self, name): + """Returns a :class:`Set` specific attribute.""" + value = getattr(self._parent, name) + setattr(self, name, value) + return value + + def __contains__(self, set): + return set is self.parent + + def __str__(self): + return "OP2 ExtrudedSet: %s with size %s (%s layers)" % \ + (self._name, self.size, self._layers) + + def __repr__(self): + return "ExtrudedSet(%r, %r)" % (self._parent, self._layers) + + @utils.cached_property + def parent(self): + return self._parent + + @utils.cached_property + def layers(self): + """The layers of this extruded set.""" + if self.constant_layers: + # Backwards compat + return self.layers_array[0, 1] + else: + raise ValueError("No single layer, use layers_array attribute") + + @utils.cached_property + def layers_array(self): + return self._layers + + +class Subset(ExtrudedSet): + + """OP2 subset. + + :param superset: The superset of the subset. + :type superset: a :class:`Set` or a :class:`Subset`. + :param indices: Elements of the superset that form the + subset. Duplicate values are removed when constructing the subset. + :type indices: a list of integers, or a numpy array. + """ + @utils.validate_type(('superset', Set, TypeError), + ('indices', (list, tuple, np.ndarray), TypeError)) + def __init__(self, superset, indices): + # sort and remove duplicates + indices = np.unique(indices) + if isinstance(superset, Subset): + # Unroll indices to point to those in the parent + indices = superset.indices[indices] + superset = superset.superset + assert type(superset) is Set or type(superset) is ExtrudedSet, \ + 'Subset construction failed, should not happen' + + self._superset = superset + self._indices = utils.verify_reshape(indices, dtypes.IntType, (len(indices),)) + + if len(self._indices) > 0 and (self._indices[0] < 0 or self._indices[-1] >= self._superset.total_size): + raise ex.SubsetIndexOutOfBounds( + 'Out of bounds indices in Subset construction: [%d, %d) not [0, %d)' % + (self._indices[0], self._indices[-1], self._superset.total_size)) + + self._sizes = ((self._indices < superset.core_size).sum(), + (self._indices < superset.size).sum(), + len(self._indices)) + self._extruded = superset._extruded + + @utils.cached_property + def _kernel_args_(self): + return self._superset._kernel_args_ + (self._indices.ctypes.data, ) + + @utils.cached_property + def _argtypes_(self): + return self._superset._argtypes_ + (ctypes.c_voidp, ) + + # Look up any unspecified attributes on the _set. + def __getattr__(self, name): + """Returns a :class:`Set` specific attribute.""" + value = getattr(self._superset, name) + setattr(self, name, value) + return value + + def __pow__(self, e): + """Derive a :class:`DataSet` with dimension ``e``""" + raise NotImplementedError("Deriving a DataSet from a Subset is unsupported") + + def __str__(self): + return "OP2 Subset: %s with sizes %s" % \ + (self._name, self._sizes) + + def __repr__(self): + return "Subset(%r, %r)" % (self._superset, self._indices) + + def __call__(self, *indices): + """Build a :class:`Subset` from this :class:`Subset` + + :arg indices: The elements of this :class:`Subset` from which the + :class:`Subset` should be formed. + + """ + if len(indices) == 1: + indices = indices[0] + if np.isscalar(indices): + indices = [indices] + return Subset(self, indices) + + @utils.cached_property + def superset(self): + """Returns the superset Set""" + return self._superset + + @utils.cached_property + def indices(self): + """Returns the indices pointing in the superset.""" + return self._indices + + @utils.cached_property + def owned_indices(self): + """Return the indices that correspond to the owned entities of the + superset. + """ + return self.indices[self.indices < self.superset.size] + + @utils.cached_property + def layers_array(self): + if self._superset.constant_layers: + return self._superset.layers_array + else: + return self._superset.layers_array[self.indices, ...] + + def _check_operands(self, other): + if type(other) is Set: + if other is not self._superset: + raise TypeError("Superset mismatch: self._superset (%s) != other (%s)" % (self._superset, other)) + elif type(other) is Subset: + if self._superset is not other._superset: + raise TypeError("Unable to perform set operation between subsets of mismatching supersets (%s != %s)" % (self._superset, other._superset)) + else: + raise TypeError("Unable to perform set operations between `Subset` and %s." % (type(other), )) + + def intersection(self, other): + self._check_operands(other) + if other is self._superset: + return self + else: + return type(self)(self._superset, np.intersect1d(self._indices, other._indices)) + + def union(self, other): + self._check_operands(other) + if other is self._superset: + return other + else: + return type(self)(self._superset, np.union1d(self._indices, other._indices)) + + def difference(self, other): + self._check_operands(other) + if other is self._superset: + return Subset(other, []) + else: + return type(self)(self._superset, np.setdiff1d(self._indices, other._indices)) + + def symmetric_difference(self, other): + self._check_operands(other) + if other is self._superset: + return other.symmetric_difference(self) + else: + return type(self)(self._superset, np.setxor1d(self._indices, other._indices)) + + +class SetPartition: + def __init__(self, set, offset, size): + self.set = set + self.offset = offset + self.size = size + + +class MixedSet(Set, caching.ObjectCached): + r"""A container for a bag of :class:`Set`\s.""" + + def __init__(self, sets): + r""":param iterable sets: Iterable of :class:`Set`\s or :class:`ExtrudedSet`\s""" + if self._initialized: + return + self._sets = sets + assert all(s is None or isinstance(s, GlobalSet) or ((s.layers == self._sets[0].layers).all() if s.layers is not None else True) for s in sets), \ + "All components of a MixedSet must have the same number of layers." + # TODO: do all sets need the same communicator? + self.comm = functools.reduce(lambda a, b: a or b, map(lambda s: s if s is None else s.comm, sets)) + self._initialized = True + + @utils.cached_property + def _kernel_args_(self): + raise NotImplementedError + + @utils.cached_property + def _argtypes_(self): + raise NotImplementedError + + @utils.cached_property + def _wrapper_cache_key_(self): + raise NotImplementedError + + @classmethod + def _process_args(cls, sets, **kwargs): + sets = [s for s in sets] + try: + sets = utils.as_tuple(sets, ExtrudedSet) + except TypeError: + sets = utils.as_tuple(sets, (Set, type(None))) + cache = sets[0] + return (cache, ) + (sets, ), kwargs + + @classmethod + def _cache_key(cls, sets, **kwargs): + return sets + + def __getitem__(self, idx): + """Return :class:`Set` with index ``idx`` or a given slice of sets.""" + return self._sets[idx] + + @utils.cached_property + def split(self): + r"""The underlying tuple of :class:`Set`\s.""" + return self._sets + + @utils.cached_property + def core_size(self): + """Core set size. Owned elements not touching halo elements.""" + return sum(s.core_size for s in self._sets) + + @utils.cached_property + def size(self): + """Set size, owned elements.""" + return sum(0 if s is None else s.size for s in self._sets) + + @utils.cached_property + def total_size(self): + """Total set size, including halo elements.""" + return sum(s.total_size for s in self._sets) + + @utils.cached_property + def sizes(self): + """Set sizes: core, owned, execute halo, total.""" + return (self.core_size, self.size, self.total_size) + + @utils.cached_property + def name(self): + """User-defined labels.""" + return tuple(s.name for s in self._sets) + + @utils.cached_property + def halo(self): + r""":class:`Halo`\s associated with these :class:`Set`\s.""" + halos = tuple(s.halo for s in self._sets) + return halos if any(halos) else None + + @utils.cached_property + def _extruded(self): + return isinstance(self._sets[0], ExtrudedSet) + + @utils.cached_property + def layers(self): + """Numbers of layers in the extruded mesh (or None if this MixedSet is not extruded).""" + return self._sets[0].layers + + def __iter__(self): + r"""Yield all :class:`Set`\s when iterated over.""" + for s in self._sets: + yield s + + def __len__(self): + """Return number of contained :class:`Set`s.""" + return len(self._sets) + + def __pow__(self, e): + """Derive a :class:`MixedDataSet` with dimensions ``e``""" + from pyop2.types import MixedDataSet + return MixedDataSet(self._sets, e) + + def __str__(self): + return "OP2 MixedSet composed of Sets: %s" % (self._sets,) + + def __repr__(self): + return "MixedSet(%r)" % (self._sets,) + + def __eq__(self, other): + return type(self) == type(other) and self._sets == other._sets diff --git a/setup.py b/setup.py index 3b30a377d..32a20fa16 100644 --- a/setup.py +++ b/setup.py @@ -147,7 +147,7 @@ def run(self): install_requires=install_requires, dependency_links=dep_links, test_requires=test_requires, - packages=['pyop2', 'pyop2.codegen'], + packages=['pyop2', 'pyop2.codegen', 'pyop2.types'], package_data={ 'pyop2': ['assets/*', '*.h', '*.pxd', '*.pyx', 'codegen/c/*.c']}, scripts=glob('scripts/*'), diff --git a/test/unit/test_api.py b/test/unit/test_api.py index eee28bb35..777eac4d3 100644 --- a/test/unit/test_api.py +++ b/test/unit/test_api.py @@ -39,10 +39,7 @@ import numpy as np from numpy.testing import assert_equal -from pyop2 import op2 -from pyop2 import exceptions -from pyop2 import sequential -from pyop2 import base +from pyop2 import exceptions, op2 @pytest.fixture @@ -358,7 +355,7 @@ def test_iteration_incompatibility(self, set, m_iterset_toset, dat): e = op2.ExtrudedSet(set, 5) k = op2.Kernel('static void k() { }', 'k') with pytest.raises(exceptions.MapValueError): - base.ParLoop(k, e, dat(op2.READ, m_iterset_toset)) + op2.ParLoop(k, e, dat(op2.READ, m_iterset_toset)) class TestSubsetAPI: @@ -508,7 +505,7 @@ def test_mixed_set_ne_set(self, sets): def test_mixed_set_repr(self, mset): "MixedSet repr should produce a MixedSet object when eval'd." from pyop2.op2 import Set, MixedSet # noqa: needed by eval - assert isinstance(eval(repr(mset)), base.MixedSet) + assert isinstance(eval(repr(mset)), op2.MixedSet) def test_mixed_set_str(self, mset): "MixedSet should have the expected string representation." @@ -718,7 +715,7 @@ def test_mixed_dset_ne_dset(self, diterset, dtoset): def test_mixed_dset_repr(self, mdset): "MixedDataSet repr should produce a MixedDataSet object when eval'd." from pyop2.op2 import Set, DataSet, MixedDataSet # noqa: needed by eval - assert isinstance(eval(repr(mdset)), base.MixedDataSet) + assert isinstance(eval(repr(mdset)), op2.MixedDataSet) def test_mixed_dset_str(self, mdset): "MixedDataSet should have the expected string representation." @@ -1000,7 +997,7 @@ def test_mixed_dat_repr(self, mdat): "MixedDat repr should produce a MixedDat object when eval'd." from pyop2.op2 import Set, DataSet, MixedDataSet, Dat, MixedDat # noqa: needed by eval from numpy import dtype # noqa: needed by eval - assert isinstance(eval(repr(mdat)), base.MixedDat) + assert isinstance(eval(repr(mdat)), op2.MixedDat) def test_mixed_dat_str(self, mdat): "MixedDat should have the expected string representation." @@ -1220,7 +1217,7 @@ def test_mat_illegal_sets(self): def test_mat_illegal_name(self, sparsity): "Mat name should be string." - with pytest.raises(sequential.NameTypeError): + with pytest.raises(exceptions.NameTypeError): op2.Mat(sparsity, name=2) def test_mat_dtype(self, mat): @@ -1663,7 +1660,7 @@ def test_illegal_dat_iterset(self): map = op2.Map(set2, set1, 1, [0, 0, 0]) kernel = op2.Kernel("void k() { }", "k") with pytest.raises(exceptions.MapValueError): - base.ParLoop(kernel, set1, dat(op2.READ, map)) + op2.ParLoop(kernel, set1, dat(op2.READ, map)) def test_illegal_mat_iterset(self, sparsity): """ParLoop should reject a Mat argument using a different iteration diff --git a/test/unit/test_caching.py b/test/unit/test_caching.py index f3c68e0ef..783f6cf4e 100644 --- a/test/unit/test_caching.py +++ b/test/unit/test_caching.py @@ -34,7 +34,9 @@ import pytest import numpy -from pyop2 import op2, base +from pyop2 import op2 +import pyop2.kernel +import pyop2.parloop from coffee.base import * @@ -280,7 +282,7 @@ class TestGeneratedCodeCache: Generated Code Cache Tests. """ - cache = base.JITModule._cache + cache = pyop2.parloop.JITModule._cache @pytest.fixture def a(cls, diterset): @@ -470,7 +472,7 @@ class TestKernelCache: Kernel caching tests. """ - cache = base.Kernel._cache + cache = pyop2.kernel.Kernel._cache def test_kernels_same_code_same_name(self): """Kernels with same code and name should be retrieved from cache.""" diff --git a/test/unit/test_global_reduction.py b/test/unit/test_global_reduction.py index 4f3d6e29a..fa2258924 100644 --- a/test/unit/test_global_reduction.py +++ b/test/unit/test_global_reduction.py @@ -449,12 +449,9 @@ def test_inc_repeated_loop(self, set): assert_allclose(g.data, set.size) def test_inc_reused_loop(self, set): - from pyop2.sequential import ParLoop g = op2.Global(1, 0, dtype=numpy.uint32) k = """void k(unsigned int* g) { *g += 1; }""" - loop = ParLoop(op2.Kernel(k, "k"), - set, - g(op2.INC)) + loop = op2.ParLoop(op2.Kernel(k, "k"), set, g(op2.INC)) loop.compute() assert_allclose(g.data, set.size) loop.compute()