diff --git a/pyop2/base.py b/pyop2/base.py index 5de3b6ee8..a247597a6 100644 --- a/pyop2/base.py +++ b/pyop2/base.py @@ -47,6 +47,7 @@ import types from hashlib import md5 +from pyop2.datatypes import IntType, as_cstr from pyop2.configuration import configuration from pyop2.caching import Cached, ObjectCached from pyop2.exceptions import * @@ -916,7 +917,7 @@ def __init__(self, superset, indices): 'Subset construction failed, should not happen' self._superset = superset - self._indices = verify_reshape(indices, np.int32, (len(indices),)) + self._indices = verify_reshape(indices, IntType, (len(indices),)) if len(self._indices) > 0 and (self._indices[0] < 0 or self._indices[-1] >= self._superset.total_size): @@ -1650,21 +1651,7 @@ def dtype(self): @cached_property def ctype(self): """The c type of the data.""" - # FIXME: Complex and float16 not supported - typemap = {"bool": "unsigned char", - "int": "int", - "int8": "char", - "int16": "short", - "int32": "int", - "int64": "long long", - "uint8": "unsigned char", - "uint16": "unsigned short", - "uint32": "unsigned int", - "uint64": "unsigned long", - "float": "double", - "float32": "float", - "float64": "double"} - return typemap[self.dtype.name] + return as_cstr(self.dtype) @cached_property def name(self): @@ -2838,10 +2825,14 @@ def __init__(self, iterset, toset, arity, values=None, name=None, offset=None, p self._toset = toset self.comm = toset.comm self._arity = arity - self._values = verify_reshape(values, np.int32, (iterset.total_size, arity), + self._values = verify_reshape(values, IntType, + (iterset.total_size, arity), allow_none=True) self._name = name or "map_%d" % Map._globalcount - self._offset = offset + if offset is None or len(offset) == 0: + self._offset = None + else: + self._offset = verify_reshape(offset, IntType, (arity, )) # This is intended to be used for modified maps, for example # where a boundary condition is imposed by setting some map # entries negative. @@ -2855,9 +2846,9 @@ def __init__(self, iterset, toset, arity, values=None, name=None, offset=None, p if offset is not None and bt_masks is not None: for name, mask in six.iteritems(bt_masks): - self._bottom_mask[name] = np.zeros(len(offset)) + self._bottom_mask[name] = np.zeros(len(offset), dtype=IntType) self._bottom_mask[name][mask[0]] = -1 - self._top_mask[name] = np.zeros(len(offset)) + self._top_mask[name] = np.zeros(len(offset), dtype=IntType) self._top_mask[name][mask[1]] = -1 Map._globalcount += 1 diff --git a/pyop2/datatypes.py b/pyop2/datatypes.py new file mode 100644 index 000000000..017428d11 --- /dev/null +++ b/pyop2/datatypes.py @@ -0,0 +1,42 @@ +from __future__ import absolute_import, print_function, division + +import ctypes + +import numpy +from petsc4py.PETSc import IntType, RealType, ScalarType + +IntType = numpy.dtype(IntType) +RealType = numpy.dtype(RealType) +ScalarType = numpy.dtype(ScalarType) + + +def as_cstr(dtype): + """Convert a numpy dtype like object to a C type as a string.""" + return {"bool": "unsigned char", + "int": "int", + "int8": "int8_t", + "int16": "int16_t", + "int32": "int32_t", + "int64": "int64_t", + "uint8": "uint8_t", + "uint16": "uint16_t", + "uint32": "uint32_t", + "uint64": "uint64_t", + "float32": "float", + "float64": "double"}[numpy.dtype(dtype).name] + + +def as_ctypes(dtype): + """Convert a numpy dtype like object to a ctypes type.""" + return {"bool": ctypes.c_bool, + "int": ctypes.c_int, + "int8": ctypes.c_char, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "float32": ctypes.c_float, + "float64": ctypes.c_double}[numpy.dtype(dtype).name] diff --git a/pyop2/petsc_base.py b/pyop2/petsc_base.py index f2fadba06..1f7ff849b 100644 --- a/pyop2/petsc_base.py +++ b/pyop2/petsc_base.py @@ -37,6 +37,7 @@ from functools import partial import numpy as np +from pyop2.datatypes import IntType from pyop2 import base from pyop2 import mpi from pyop2 import sparsity @@ -55,7 +56,7 @@ def lgmap(self): """ lgmap = PETSc.LGMap() if self.comm.size == 1: - lgmap.create(indices=np.arange(self.size, dtype=PETSc.IntType), + lgmap.create(indices=np.arange(self.size, dtype=IntType), bsize=self.cdim, comm=self.comm) else: lgmap.create(indices=self.halo.global_to_petsc_numbering, @@ -134,7 +135,7 @@ def lgmap(self): indices for this :class:`DataSet`. """ lgmap = PETSc.LGMap() - lgmap.create(indices=np.arange(1, dtype=PETSc.IntType), + lgmap.create(indices=np.arange(1, dtype=IntType), bsize=self.cdim, comm=self.comm) return lgmap @@ -237,7 +238,7 @@ def lgmap(self): lgmap = PETSc.LGMap() if self.comm.size == 1: size = sum(s.size * s.cdim for s in self) - lgmap.create(indices=np.arange(size, dtype=PETSc.IntType), + lgmap.create(indices=np.arange(size, dtype=IntType), bsize=1, comm=self.comm) return lgmap # Compute local to global maps for a monolithic mixed system @@ -264,18 +265,19 @@ def lgmap(self): # Finally, we need to shift the field-local entry by the # current field offset. idx_size = sum(s.total_size*s.cdim for s in self) - indices = np.full(idx_size, -1, dtype=PETSc.IntType) - owned_sz = np.array([sum(s.size * s.cdim for s in self)], dtype=PETSc.IntType) + indices = np.full(idx_size, -1, dtype=IntType) + owned_sz = np.array([sum(s.size * s.cdim for s in self)], + dtype=IntType) field_offset = np.empty_like(owned_sz) self.comm.Scan(owned_sz, field_offset) field_offset -= owned_sz - all_field_offsets = np.empty(self.comm.size, dtype=PETSc.IntType) + all_field_offsets = np.empty(self.comm.size, dtype=IntType) self.comm.Allgather(field_offset, all_field_offsets) start = 0 - all_local_offsets = np.zeros(self.comm.size, dtype=PETSc.IntType) - current_offsets = np.zeros(self.comm.size + 1, dtype=PETSc.IntType) + all_local_offsets = np.zeros(self.comm.size, dtype=IntType) + current_offsets = np.zeros(self.comm.size + 1, dtype=IntType) for s in self: idx = indices[start:start + s.total_size * s.cdim] owned_sz[0] = s.size * s.cdim @@ -557,7 +559,7 @@ def _flush_assembly(self): self._parent._flush_assembly() def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): - rows = np.asarray(rows, dtype=PETSc.IntType) + rows = np.asarray(rows, dtype=IntType) rbs, _ = self.dims[0][0] if len(rows) == 0: # No need to set anything if we didn't get any rows, but @@ -844,7 +846,7 @@ def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None): The indices in ``rows`` should index the process-local rows of the matrix (no mapping to global indexes is applied). """ - rows = np.asarray(rows, dtype=PETSc.IntType) + rows = np.asarray(rows, dtype=IntType) rbs, _ = self.dims[0][0] if len(rows) == 0: # No need to set anything if we didn't get any rows, but @@ -877,6 +879,9 @@ def _assemble(self): if self.assembly_state is not Mat.ASSEMBLED: self.handle.assemble() self.assembly_state = Mat.ASSEMBLED + # Mark blocks as assembled as well. + for m in self: + m.handle.assemble() def addto_values(self, rows, cols, values): """Add a block of values to the :class:`Mat`.""" diff --git a/pyop2/sequential.py b/pyop2/sequential.py index ee20db293..e262297af 100644 --- a/pyop2/sequential.py +++ b/pyop2/sequential.py @@ -36,11 +36,11 @@ from six.moves import range, zip import os -import ctypes from textwrap import dedent from copy import deepcopy as dcopy from collections import OrderedDict +from pyop2.datatypes import IntType, as_cstr, as_ctypes from pyop2 import base from pyop2 import compilation from pyop2 import petsc_base @@ -109,7 +109,7 @@ def c_wrapper_arg(self): for i, map in enumerate(as_tuple(self.map, Map)): if map is not None: for j, m in enumerate(map): - val += ", int *%s" % self.c_map_name(i, j) + val += ", %s *%s" % (as_cstr(IntType), self.c_map_name(i, j)) return val def c_vec_dec(self, is_facet=False): @@ -255,6 +255,7 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl, rows_str = "rowmap" cols_str = "colmap" addto = "MatSetValuesLocal" + nbits = IntType.itemsize * 8 - 2 fdict = {'nrows': nrows, 'ncols': ncols, 'rdim': rdim, @@ -262,33 +263,44 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl, 'rowmap': self.c_map_name(0, i), 'colmap': self.c_map_name(1, j), 'drop_full_row': 0 if rmap.vector_index is not None else 1, - 'drop_full_col': 0 if cmap.vector_index is not None else 1} + 'drop_full_col': 0 if cmap.vector_index is not None else 1, + 'IntType': as_cstr(IntType), + 'NBIT': nbits, + # UGH, need to make sure literals have + # correct type ("long int" if using 64 bit + # ints). + 'ONE': {62: "1L", 30: "1"}[nbits], + 'MASK': "0x%x%s" % (sum(2**(nbits - i) for i in range(3)), + {62: "L", 30: ""}[nbits])} # Horrible hack alert # To apply BCs to a component of a Dat with cdim > 1 # we encode which components to apply things to in the # high bits of the map value # The value that comes in is: - # -(row + 1 + sum_i 2 ** (30 - i)) + # NBIT = (sizeof(IntType)*8 - 2) + # -(row + 1 + sum_i 2 ** (NBIT - i)) # where i are the components to zero # # So, the actual row (if it's negative) is: - # (~input) & ~0x70000000 + # MASK = sum_i 2**(NBIT - i) + # (~input) & ~MASK # And we can determine which components to zero by - # inspecting the high bits (1 << 30 - i) + # inspecting the high bits (1 << NBIT - i) ret.append(""" - PetscInt rowmap[%(nrows)d*%(rdim)d]; - PetscInt colmap[%(ncols)d*%(cdim)d]; - int discard, tmp, block_row, block_col; + %(IntType)s rowmap[%(nrows)d*%(rdim)d]; + %(IntType)s colmap[%(ncols)d*%(cdim)d]; + %(IntType)s block_row, block_col, tmp; + int discard; for ( int j = 0; j < %(nrows)d; j++ ) { block_row = %(rowmap)s[i*%(nrows)d + j]; discard = 0; tmp = -(block_row + 1); if ( block_row < 0 ) { discard = 1; - block_row = tmp & ~0x70000000; + block_row = tmp & ~%(MASK)s; } for ( int k = 0; k < %(rdim)d; k++ ) { - if ( discard && (!(tmp & 0x70000000) || %(drop_full_row)d || ((tmp & (1 << (30 - k))) != 0)) ) { + if ( discard && (!(tmp & %(MASK)s) || %(drop_full_row)d || ((tmp & (%(ONE)s << (%(NBIT)s - k))) != 0)) ) { rowmap[j*%(rdim)d + k] = -1; } else { rowmap[j*%(rdim)d + k] = (block_row)*%(rdim)d + k; @@ -301,10 +313,10 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl, tmp = -(block_col + 1); if ( block_col < 0 ) { discard = 1; - block_col = tmp & ~0x70000000; + block_col = tmp & ~%(MASK)s; } for ( int k = 0; k < %(cdim)d; k++ ) { - if ( discard && (!(tmp & 0x70000000) || %(drop_full_col)d || ((tmp & (1 << (30 - k))) != 0)) ) { + if ( discard && (!(tmp & %(MASK)s) || %(drop_full_col)d || ((tmp & (%(ONE)s << (%(NBIT)s- k))) != 0)) ) { colmap[j*%(cdim)d + k] = -1; } else { colmap[j*%(cdim)d + k] = (block_col)*%(cdim)d + k; @@ -325,6 +337,7 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl, 'ncols': ncols, 'rows': rows_str, 'cols': cols_str, + 'IntType': as_cstr(IntType), 'insert': "INSERT_VALUES" if self.access == WRITE else "ADD_VALUES"}) ret = " "*16 + "{\n" + "\n".join(ret) + "\n" + " "*16 + "}" return ret @@ -397,8 +410,10 @@ def c_map_decl(self, is_facet=False): dim = m.arity if is_facet: dim *= 2 - val.append("int xtr_%(name)s[%(dim)s];" % - {'name': self.c_map_name(i, j), 'dim': dim}) + val.append("%(IntType)s xtr_%(name)s[%(dim)s];" % + {'name': self.c_map_name(i, j), + 'dim': dim, + 'IntType': as_cstr(IntType)}) return '\n'.join(val)+'\n' def c_map_init(self, is_top=False, is_facet=False): @@ -552,7 +567,8 @@ def c_buffer_scatter_vec(self, count, i, j, mxofs, buf_name): class JITModule(base.JITModule): _wrapper = """ -void %(wrapper_name)s(int start, int end, +void %(wrapper_name)s(int start, + int end, %(ssinds_arg)s %(wrapper_args)s %(layer_arg)s) { @@ -561,7 +577,7 @@ class JITModule(base.JITModule): %(map_decl)s %(vec_decs)s; for ( int n = start; n < end; n++ ) { - int i = %(index_expr)s; + %(IntType)s i = %(index_expr)s; %(vec_inits)s; %(map_init)s; %(extr_loop)s @@ -657,6 +673,7 @@ def compile(self): #include #include #include + #include %(sys_headers)s %(kernel)s @@ -715,7 +732,8 @@ def generate_code(self): return self._code_dict def set_argtypes(self, iterset, *args): - argtypes = [ctypes.c_int, ctypes.c_int] + index_type = as_ctypes(IntType) + argtypes = [index_type, index_type] if isinstance(iterset, Subset): argtypes.append(iterset._argtype) for arg in args: @@ -732,8 +750,8 @@ def set_argtypes(self, iterset, *args): argtypes.append(m._argtype) if iterset._extruded: - argtypes.append(ctypes.c_int) - argtypes.append(ctypes.c_int) + argtypes.append(index_type) + argtypes.append(index_type) self._argtypes = argtypes @@ -828,12 +846,12 @@ def extrusion_loop(): return "for (int j_0 = start_layer; j_0 < end_layer; ++j_0){" _ssinds_arg = "" - _index_expr = "n" + _index_expr = "(%s)n" % as_cstr(IntType) is_top = (iteration_region == ON_TOP) is_facet = (iteration_region == ON_INTERIOR_FACETS) if isinstance(itspace._iterset, Subset): - _ssinds_arg = "int* ssinds," + _ssinds_arg = "%s* ssinds," % as_cstr(IntType) _index_expr = "ssinds[n]" _wrapper_args = ', '.join([arg.c_wrapper_arg() for arg in args]) @@ -1016,6 +1034,7 @@ def itset_loop_body(i, j, shape, offsets, is_facet=False): 'buffer_decl': _buf_decl, 'buffer_gather': _buf_gather, 'kernel_args': _kernel_args, + 'IntType': as_cstr(IntType), 'itset_loop_body': '\n'.join([itset_loop_body(i, j, shape, offsets, is_facet=(iteration_region == ON_INTERIOR_FACETS)) for i, j, shape, offsets in itspace])} @@ -1040,19 +1059,23 @@ def generate_cell_wrapper(itspace, args, forward_args=(), kernel_name=None, wrap snippets = wrapper_snippets(itspace, args, kernel_name=kernel_name, wrapper_name=wrapper_name) if itspace._extruded: - snippets['index_exprs'] = """int i = cell / nlayers; - int j = cell % nlayers;""" - snippets['nlayers_arg'] = ", int nlayers" - snippets['extr_pos_loop'] = "{" if direct else "for (int j_0 = 0; j_0 < j; ++j_0) {" + snippets['index_exprs'] = """{0} i = cell / nlayers; + {0} j = cell % nlayers;""".format(as_cstr(IntType)) + snippets['nlayers_arg'] = ", {0} nlayers".format(as_cstr(IntType)) + snippets['extr_pos_loop'] = "{" if direct else "for ({0} j_0 = 0; j_0 < j; ++j_0) {{".format(as_cstr(IntType)) else: - snippets['index_exprs'] = "int i = cell;" + snippets['index_exprs'] = "{0} i = cell;".format(as_cstr(IntType)) snippets['nlayers_arg'] = "" snippets['extr_pos_loop'] = "" snippets['wrapper_fargs'] = "".join("{1} farg{0}, ".format(i, arg) for i, arg in enumerate(forward_args)) snippets['kernel_fargs'] = "".join("farg{0}, ".format(i) for i in range(len(forward_args))) - template = """static inline void %(wrapper_name)s(%(wrapper_fargs)s%(wrapper_args)s%(nlayers_arg)s, int cell) + snippets['IntType'] = as_cstr(IntType) + template = """ +#include + +static inline void %(wrapper_name)s(%(wrapper_fargs)s%(wrapper_args)s%(nlayers_arg)s, %(IntType)s cell) { %(user_code)s %(wrapper_decs)s; diff --git a/pyop2/sparsity.pyx b/pyop2/sparsity.pyx index 350765492..faab0bf3c 100644 --- a/pyop2/sparsity.pyx +++ b/pyop2/sparsity.pyx @@ -40,6 +40,7 @@ cimport numpy as np import cython cimport petsc4py.PETSc as PETSc from petsc4py import PETSc +from pyop2.datatypes import IntType np.import_array() @@ -229,8 +230,13 @@ def build_sparsity(object sparsity, bint parallel, bool block=True): if nrows == 0: # We don't own any rows, return something appropriate. - dummy = np.empty(0, dtype=np.int32).reshape(-1) - return 0, 0, dummy, dummy, dummy, dummy + dummy = np.empty(0, dtype=IntType).reshape(-1) + sparsity._d_nz = 0 + sparsity._o_nz = 0 + sparsity._d_nnz = dummy + sparsity._o_nnz = dummy + sparsity._rowptr = dummy + sparsity._colidx = dummy # Exposition: # When building a monolithic sparsity for a mixed space, we build @@ -294,18 +300,18 @@ def build_sparsity(object sparsity, bint parallel, bool block=True): row_offset += rset[r].size * rdim restore_writeable(rmap, rflag) - cdef np.ndarray[PetscInt, ndim=1] nnz = np.zeros(nrows, dtype=PETSc.IntType) - cdef np.ndarray[PetscInt, ndim=1] onnz = np.zeros(nrows, dtype=PETSc.IntType) + cdef np.ndarray[PetscInt, ndim=1] nnz = np.zeros(nrows, dtype=IntType) + cdef np.ndarray[PetscInt, ndim=1] onnz = np.zeros(nrows, dtype=IntType) cdef np.ndarray[PetscInt, ndim=1] rowptr cdef np.ndarray[PetscInt, ndim=1] colidx cdef int nz, onz if make_rowptr: - rowptr = np.empty(nrows + 1, dtype=PETSc.IntType) + rowptr = np.empty(nrows + 1, dtype=IntType) rowptr[0] = 0 else: # Can't build these, so create dummy arrays - rowptr = np.empty(0, dtype=PETSc.IntType).reshape(-1) - colidx = np.empty(0, dtype=PETSc.IntType).reshape(-1) + rowptr = np.empty(0, dtype=IntType).reshape(-1) + colidx = np.empty(0, dtype=IntType).reshape(-1) nz = 0 onz = 0 @@ -322,7 +328,7 @@ def build_sparsity(object sparsity, bint parallel, bool block=True): onz += val if make_rowptr: - colidx = np.empty(nz, dtype=PETSc.IntType) + colidx = np.empty(nz, dtype=IntType) assert diag.size() == 1, "Can't make rowptr for mixed monolithic mat" for row in range(nrows): diag[0][row].sort() diff --git a/test/unit/test_api.py b/test/unit/test_api.py index 72fc4ce89..8ea2c5575 100644 --- a/test/unit/test_api.py +++ b/test/unit/test_api.py @@ -1487,8 +1487,9 @@ def test_map_illegal_length(self, iterset, toset): def test_map_convert_float_int(self, iterset, toset): "Float data should be implicitely converted to int." + from pyop2.datatypes import IntType m = op2.Map(iterset, toset, 1, [1.5] * iterset.size) - assert m.values.dtype == np.int32 and m.values.sum() == iterset.size + assert m.values.dtype == IntType and m.values.sum() == iterset.size def test_map_reshape(self, iterset, toset): "Data should be reshaped according to arity."