diff --git a/pyop2/base.py b/pyop2/base.py
index 5de3b6ee8..a247597a6 100644
--- a/pyop2/base.py
+++ b/pyop2/base.py
@@ -47,6 +47,7 @@
 import types
 from hashlib import md5
 
+from pyop2.datatypes import IntType, as_cstr
 from pyop2.configuration import configuration
 from pyop2.caching import Cached, ObjectCached
 from pyop2.exceptions import *
@@ -916,7 +917,7 @@ def __init__(self, superset, indices):
             'Subset construction failed, should not happen'
 
         self._superset = superset
-        self._indices = verify_reshape(indices, np.int32, (len(indices),))
+        self._indices = verify_reshape(indices, IntType, (len(indices),))
 
         if len(self._indices) > 0 and (self._indices[0] < 0 or
                                        self._indices[-1] >= self._superset.total_size):
@@ -1650,21 +1651,7 @@ def dtype(self):
     @cached_property
     def ctype(self):
         """The c type of the data."""
-        # FIXME: Complex and float16 not supported
-        typemap = {"bool": "unsigned char",
-                   "int": "int",
-                   "int8": "char",
-                   "int16": "short",
-                   "int32": "int",
-                   "int64": "long long",
-                   "uint8": "unsigned char",
-                   "uint16": "unsigned short",
-                   "uint32": "unsigned int",
-                   "uint64": "unsigned long",
-                   "float": "double",
-                   "float32": "float",
-                   "float64": "double"}
-        return typemap[self.dtype.name]
+        return as_cstr(self.dtype)
 
     @cached_property
     def name(self):
@@ -2838,10 +2825,14 @@ def __init__(self, iterset, toset, arity, values=None, name=None, offset=None, p
         self._toset = toset
         self.comm = toset.comm
         self._arity = arity
-        self._values = verify_reshape(values, np.int32, (iterset.total_size, arity),
+        self._values = verify_reshape(values, IntType,
+                                      (iterset.total_size, arity),
                                       allow_none=True)
         self._name = name or "map_%d" % Map._globalcount
-        self._offset = offset
+        if offset is None or len(offset) == 0:
+            self._offset = None
+        else:
+            self._offset = verify_reshape(offset, IntType, (arity, ))
         # This is intended to be used for modified maps, for example
         # where a boundary condition is imposed by setting some map
         # entries negative.
@@ -2855,9 +2846,9 @@ def __init__(self, iterset, toset, arity, values=None, name=None, offset=None, p
 
         if offset is not None and bt_masks is not None:
             for name, mask in six.iteritems(bt_masks):
-                self._bottom_mask[name] = np.zeros(len(offset))
+                self._bottom_mask[name] = np.zeros(len(offset), dtype=IntType)
                 self._bottom_mask[name][mask[0]] = -1
-                self._top_mask[name] = np.zeros(len(offset))
+                self._top_mask[name] = np.zeros(len(offset), dtype=IntType)
                 self._top_mask[name][mask[1]] = -1
         Map._globalcount += 1
 
diff --git a/pyop2/datatypes.py b/pyop2/datatypes.py
new file mode 100644
index 000000000..017428d11
--- /dev/null
+++ b/pyop2/datatypes.py
@@ -0,0 +1,42 @@
+from __future__ import absolute_import, print_function, division
+
+import ctypes
+
+import numpy
+from petsc4py.PETSc import IntType, RealType, ScalarType
+
+IntType = numpy.dtype(IntType)
+RealType = numpy.dtype(RealType)
+ScalarType = numpy.dtype(ScalarType)
+
+
+def as_cstr(dtype):
+    """Convert a numpy dtype like object to a C type as a string."""
+    return {"bool": "unsigned char",
+            "int": "int",
+            "int8": "int8_t",
+            "int16": "int16_t",
+            "int32": "int32_t",
+            "int64": "int64_t",
+            "uint8": "uint8_t",
+            "uint16": "uint16_t",
+            "uint32": "uint32_t",
+            "uint64": "uint64_t",
+            "float32": "float",
+            "float64": "double"}[numpy.dtype(dtype).name]
+
+
+def as_ctypes(dtype):
+    """Convert a numpy dtype like object to a ctypes type."""
+    return {"bool": ctypes.c_bool,
+            "int": ctypes.c_int,
+            "int8": ctypes.c_char,
+            "int16": ctypes.c_int16,
+            "int32": ctypes.c_int32,
+            "int64": ctypes.c_int64,
+            "uint8": ctypes.c_ubyte,
+            "uint16": ctypes.c_uint16,
+            "uint32": ctypes.c_uint32,
+            "uint64": ctypes.c_uint64,
+            "float32": ctypes.c_float,
+            "float64": ctypes.c_double}[numpy.dtype(dtype).name]
diff --git a/pyop2/petsc_base.py b/pyop2/petsc_base.py
index f2fadba06..1f7ff849b 100644
--- a/pyop2/petsc_base.py
+++ b/pyop2/petsc_base.py
@@ -37,6 +37,7 @@
 from functools import partial
 import numpy as np
 
+from pyop2.datatypes import IntType
 from pyop2 import base
 from pyop2 import mpi
 from pyop2 import sparsity
@@ -55,7 +56,7 @@ def lgmap(self):
         """
         lgmap = PETSc.LGMap()
         if self.comm.size == 1:
-            lgmap.create(indices=np.arange(self.size, dtype=PETSc.IntType),
+            lgmap.create(indices=np.arange(self.size, dtype=IntType),
                          bsize=self.cdim, comm=self.comm)
         else:
             lgmap.create(indices=self.halo.global_to_petsc_numbering,
@@ -134,7 +135,7 @@ def lgmap(self):
         indices for this :class:`DataSet`.
         """
         lgmap = PETSc.LGMap()
-        lgmap.create(indices=np.arange(1, dtype=PETSc.IntType),
+        lgmap.create(indices=np.arange(1, dtype=IntType),
                      bsize=self.cdim, comm=self.comm)
         return lgmap
 
@@ -237,7 +238,7 @@ def lgmap(self):
         lgmap = PETSc.LGMap()
         if self.comm.size == 1:
             size = sum(s.size * s.cdim for s in self)
-            lgmap.create(indices=np.arange(size, dtype=PETSc.IntType),
+            lgmap.create(indices=np.arange(size, dtype=IntType),
                          bsize=1, comm=self.comm)
             return lgmap
         # Compute local to global maps for a monolithic mixed system
@@ -264,18 +265,19 @@ def lgmap(self):
         # Finally, we need to shift the field-local entry by the
         # current field offset.
         idx_size = sum(s.total_size*s.cdim for s in self)
-        indices = np.full(idx_size, -1, dtype=PETSc.IntType)
-        owned_sz = np.array([sum(s.size * s.cdim for s in self)], dtype=PETSc.IntType)
+        indices = np.full(idx_size, -1, dtype=IntType)
+        owned_sz = np.array([sum(s.size * s.cdim for s in self)],
+                            dtype=IntType)
         field_offset = np.empty_like(owned_sz)
         self.comm.Scan(owned_sz, field_offset)
         field_offset -= owned_sz
 
-        all_field_offsets = np.empty(self.comm.size, dtype=PETSc.IntType)
+        all_field_offsets = np.empty(self.comm.size, dtype=IntType)
         self.comm.Allgather(field_offset, all_field_offsets)
 
         start = 0
-        all_local_offsets = np.zeros(self.comm.size, dtype=PETSc.IntType)
-        current_offsets = np.zeros(self.comm.size + 1, dtype=PETSc.IntType)
+        all_local_offsets = np.zeros(self.comm.size, dtype=IntType)
+        current_offsets = np.zeros(self.comm.size + 1, dtype=IntType)
         for s in self:
             idx = indices[start:start + s.total_size * s.cdim]
             owned_sz[0] = s.size * s.cdim
@@ -557,7 +559,7 @@ def _flush_assembly(self):
         self._parent._flush_assembly()
 
     def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None):
-        rows = np.asarray(rows, dtype=PETSc.IntType)
+        rows = np.asarray(rows, dtype=IntType)
         rbs, _ = self.dims[0][0]
         if len(rows) == 0:
             # No need to set anything if we didn't get any rows, but
@@ -844,7 +846,7 @@ def set_local_diagonal_entries(self, rows, diag_val=1.0, idx=None):
         The indices in ``rows`` should index the process-local rows of
         the matrix (no mapping to global indexes is applied).
         """
-        rows = np.asarray(rows, dtype=PETSc.IntType)
+        rows = np.asarray(rows, dtype=IntType)
         rbs, _ = self.dims[0][0]
         if len(rows) == 0:
             # No need to set anything if we didn't get any rows, but
@@ -877,6 +879,9 @@ def _assemble(self):
         if self.assembly_state is not Mat.ASSEMBLED:
             self.handle.assemble()
             self.assembly_state = Mat.ASSEMBLED
+            # Mark blocks as assembled as well.
+            for m in self:
+                m.handle.assemble()
 
     def addto_values(self, rows, cols, values):
         """Add a block of values to the :class:`Mat`."""
diff --git a/pyop2/sequential.py b/pyop2/sequential.py
index ee20db293..e262297af 100644
--- a/pyop2/sequential.py
+++ b/pyop2/sequential.py
@@ -36,11 +36,11 @@
 from six.moves import range, zip
 
 import os
-import ctypes
 from textwrap import dedent
 from copy import deepcopy as dcopy
 from collections import OrderedDict
 
+from pyop2.datatypes import IntType, as_cstr, as_ctypes
 from pyop2 import base
 from pyop2 import compilation
 from pyop2 import petsc_base
@@ -109,7 +109,7 @@ def c_wrapper_arg(self):
             for i, map in enumerate(as_tuple(self.map, Map)):
                 if map is not None:
                     for j, m in enumerate(map):
-                        val += ", int *%s" % self.c_map_name(i, j)
+                        val += ", %s *%s" % (as_cstr(IntType), self.c_map_name(i, j))
         return val
 
     def c_vec_dec(self, is_facet=False):
@@ -255,6 +255,7 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl,
                 rows_str = "rowmap"
                 cols_str = "colmap"
                 addto = "MatSetValuesLocal"
+                nbits = IntType.itemsize * 8 - 2
                 fdict = {'nrows': nrows,
                          'ncols': ncols,
                          'rdim': rdim,
@@ -262,33 +263,44 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl,
                          'rowmap': self.c_map_name(0, i),
                          'colmap': self.c_map_name(1, j),
                          'drop_full_row': 0 if rmap.vector_index is not None else 1,
-                         'drop_full_col': 0 if cmap.vector_index is not None else 1}
+                         'drop_full_col': 0 if cmap.vector_index is not None else 1,
+                         'IntType': as_cstr(IntType),
+                         'NBIT': nbits,
+                         # UGH, need to make sure literals have
+                         # correct type ("long int" if using 64 bit
+                         # ints).
+                         'ONE': {62: "1L", 30: "1"}[nbits],
+                         'MASK': "0x%x%s" % (sum(2**(nbits - i) for i in range(3)),
+                                             {62: "L", 30: ""}[nbits])}
                 # Horrible hack alert
                 # To apply BCs to a component of a Dat with cdim > 1
                 # we encode which components to apply things to in the
                 # high bits of the map value
                 # The value that comes in is:
-                # -(row + 1 + sum_i 2 ** (30 - i))
+                # NBIT = (sizeof(IntType)*8 - 2)
+                # -(row + 1 + sum_i 2 ** (NBIT - i))
                 # where i are the components to zero
                 #
                 # So, the actual row (if it's negative) is:
-                # (~input) & ~0x70000000
+                # MASK = sum_i 2**(NBIT - i)
+                # (~input) & ~MASK
                 # And we can determine which components to zero by
-                # inspecting the high bits (1 << 30 - i)
+                # inspecting the high bits (1 << NBIT - i)
                 ret.append("""
-                PetscInt rowmap[%(nrows)d*%(rdim)d];
-                PetscInt colmap[%(ncols)d*%(cdim)d];
-                int discard, tmp, block_row, block_col;
+                %(IntType)s rowmap[%(nrows)d*%(rdim)d];
+                %(IntType)s colmap[%(ncols)d*%(cdim)d];
+                %(IntType)s block_row, block_col, tmp;
+                int discard;
                 for ( int j = 0; j < %(nrows)d; j++ ) {
                     block_row = %(rowmap)s[i*%(nrows)d + j];
                     discard = 0;
                     tmp = -(block_row + 1);
                     if ( block_row < 0 ) {
                         discard = 1;
-                        block_row = tmp & ~0x70000000;
+                        block_row = tmp & ~%(MASK)s;
                     }
                     for ( int k = 0; k < %(rdim)d; k++ ) {
-                        if ( discard && (!(tmp & 0x70000000) || %(drop_full_row)d || ((tmp & (1 << (30 - k))) != 0)) ) {
+                        if ( discard && (!(tmp & %(MASK)s) || %(drop_full_row)d || ((tmp & (%(ONE)s << (%(NBIT)s - k))) != 0)) ) {
                             rowmap[j*%(rdim)d + k] = -1;
                         } else {
                             rowmap[j*%(rdim)d + k] = (block_row)*%(rdim)d + k;
@@ -301,10 +313,10 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl,
                     tmp = -(block_col + 1);
                     if ( block_col < 0 ) {
                         discard = 1;
-                        block_col = tmp & ~0x70000000;
+                        block_col = tmp & ~%(MASK)s;
                     }
                     for ( int k = 0; k < %(cdim)d; k++ ) {
-                        if ( discard && (!(tmp & 0x70000000) || %(drop_full_col)d || ((tmp & (1 << (30 - k))) != 0)) ) {
+                        if ( discard && (!(tmp & %(MASK)s) || %(drop_full_col)d || ((tmp & (%(ONE)s << (%(NBIT)s- k))) != 0)) ) {
                             colmap[j*%(cdim)d + k] = -1;
                         } else {
                             colmap[j*%(cdim)d + k] = (block_col)*%(cdim)d + k;
@@ -325,6 +337,7 @@ def c_addto(self, i, j, buf_name, tmp_name, tmp_decl,
                     'ncols': ncols,
                     'rows': rows_str,
                     'cols': cols_str,
+                    'IntType': as_cstr(IntType),
                     'insert': "INSERT_VALUES" if self.access == WRITE else "ADD_VALUES"})
         ret = " "*16 + "{\n" + "\n".join(ret) + "\n" + " "*16 + "}"
         return ret
@@ -397,8 +410,10 @@ def c_map_decl(self, is_facet=False):
                 dim = m.arity
                 if is_facet:
                     dim *= 2
-                val.append("int xtr_%(name)s[%(dim)s];" %
-                           {'name': self.c_map_name(i, j), 'dim': dim})
+                val.append("%(IntType)s xtr_%(name)s[%(dim)s];" %
+                           {'name': self.c_map_name(i, j),
+                            'dim': dim,
+                            'IntType': as_cstr(IntType)})
         return '\n'.join(val)+'\n'
 
     def c_map_init(self, is_top=False, is_facet=False):
@@ -552,7 +567,8 @@ def c_buffer_scatter_vec(self, count, i, j, mxofs, buf_name):
 class JITModule(base.JITModule):
 
     _wrapper = """
-void %(wrapper_name)s(int start, int end,
+void %(wrapper_name)s(int start,
+                      int end,
                       %(ssinds_arg)s
                       %(wrapper_args)s
                       %(layer_arg)s) {
@@ -561,7 +577,7 @@ class JITModule(base.JITModule):
   %(map_decl)s
   %(vec_decs)s;
   for ( int n = start; n < end; n++ ) {
-    int i = %(index_expr)s;
+    %(IntType)s i = %(index_expr)s;
     %(vec_inits)s;
     %(map_init)s;
     %(extr_loop)s
@@ -657,6 +673,7 @@ def compile(self):
         #include <petsc.h>
         #include <stdbool.h>
         #include <math.h>
+        #include <inttypes.h>
         %(sys_headers)s
 
         %(kernel)s
@@ -715,7 +732,8 @@ def generate_code(self):
         return self._code_dict
 
     def set_argtypes(self, iterset, *args):
-        argtypes = [ctypes.c_int, ctypes.c_int]
+        index_type = as_ctypes(IntType)
+        argtypes = [index_type, index_type]
         if isinstance(iterset, Subset):
             argtypes.append(iterset._argtype)
         for arg in args:
@@ -732,8 +750,8 @@ def set_argtypes(self, iterset, *args):
                             argtypes.append(m._argtype)
 
         if iterset._extruded:
-            argtypes.append(ctypes.c_int)
-            argtypes.append(ctypes.c_int)
+            argtypes.append(index_type)
+            argtypes.append(index_type)
 
         self._argtypes = argtypes
 
@@ -828,12 +846,12 @@ def extrusion_loop():
         return "for (int j_0 = start_layer; j_0 < end_layer; ++j_0){"
 
     _ssinds_arg = ""
-    _index_expr = "n"
+    _index_expr = "(%s)n" % as_cstr(IntType)
     is_top = (iteration_region == ON_TOP)
     is_facet = (iteration_region == ON_INTERIOR_FACETS)
 
     if isinstance(itspace._iterset, Subset):
-        _ssinds_arg = "int* ssinds,"
+        _ssinds_arg = "%s* ssinds," % as_cstr(IntType)
         _index_expr = "ssinds[n]"
 
     _wrapper_args = ', '.join([arg.c_wrapper_arg() for arg in args])
@@ -1016,6 +1034,7 @@ def itset_loop_body(i, j, shape, offsets, is_facet=False):
             'buffer_decl': _buf_decl,
             'buffer_gather': _buf_gather,
             'kernel_args': _kernel_args,
+            'IntType': as_cstr(IntType),
             'itset_loop_body': '\n'.join([itset_loop_body(i, j, shape, offsets, is_facet=(iteration_region == ON_INTERIOR_FACETS))
                                           for i, j, shape, offsets in itspace])}
 
@@ -1040,19 +1059,23 @@ def generate_cell_wrapper(itspace, args, forward_args=(), kernel_name=None, wrap
     snippets = wrapper_snippets(itspace, args, kernel_name=kernel_name, wrapper_name=wrapper_name)
 
     if itspace._extruded:
-        snippets['index_exprs'] = """int i = cell / nlayers;
-    int j = cell % nlayers;"""
-        snippets['nlayers_arg'] = ", int nlayers"
-        snippets['extr_pos_loop'] = "{" if direct else "for (int j_0 = 0; j_0 < j; ++j_0) {"
+        snippets['index_exprs'] = """{0} i = cell / nlayers;
+    {0} j = cell % nlayers;""".format(as_cstr(IntType))
+        snippets['nlayers_arg'] = ", {0} nlayers".format(as_cstr(IntType))
+        snippets['extr_pos_loop'] = "{" if direct else "for ({0} j_0 = 0; j_0 < j; ++j_0) {{".format(as_cstr(IntType))
     else:
-        snippets['index_exprs'] = "int i = cell;"
+        snippets['index_exprs'] = "{0} i = cell;".format(as_cstr(IntType))
         snippets['nlayers_arg'] = ""
         snippets['extr_pos_loop'] = ""
 
     snippets['wrapper_fargs'] = "".join("{1} farg{0}, ".format(i, arg) for i, arg in enumerate(forward_args))
     snippets['kernel_fargs'] = "".join("farg{0}, ".format(i) for i in range(len(forward_args)))
 
-    template = """static inline void %(wrapper_name)s(%(wrapper_fargs)s%(wrapper_args)s%(nlayers_arg)s, int cell)
+    snippets['IntType'] = as_cstr(IntType)
+    template = """
+#include <inttypes.h>
+
+static inline void %(wrapper_name)s(%(wrapper_fargs)s%(wrapper_args)s%(nlayers_arg)s, %(IntType)s cell)
 {
     %(user_code)s
     %(wrapper_decs)s;
diff --git a/pyop2/sparsity.pyx b/pyop2/sparsity.pyx
index 350765492..faab0bf3c 100644
--- a/pyop2/sparsity.pyx
+++ b/pyop2/sparsity.pyx
@@ -40,6 +40,7 @@ cimport numpy as np
 import cython
 cimport petsc4py.PETSc as PETSc
 from petsc4py import PETSc
+from pyop2.datatypes import IntType
 
 np.import_array()
 
@@ -229,8 +230,13 @@ def build_sparsity(object sparsity, bint parallel, bool block=True):
 
     if nrows == 0:
         # We don't own any rows, return something appropriate.
-        dummy = np.empty(0, dtype=np.int32).reshape(-1)
-        return 0, 0, dummy, dummy, dummy, dummy
+        dummy = np.empty(0, dtype=IntType).reshape(-1)
+        sparsity._d_nz = 0
+        sparsity._o_nz = 0
+        sparsity._d_nnz = dummy
+        sparsity._o_nnz = dummy
+        sparsity._rowptr = dummy
+        sparsity._colidx = dummy
 
     # Exposition:
     # When building a monolithic sparsity for a mixed space, we build
@@ -294,18 +300,18 @@ def build_sparsity(object sparsity, bint parallel, bool block=True):
             row_offset += rset[r].size * rdim
             restore_writeable(rmap, rflag)
 
-    cdef np.ndarray[PetscInt, ndim=1] nnz = np.zeros(nrows, dtype=PETSc.IntType)
-    cdef np.ndarray[PetscInt, ndim=1] onnz = np.zeros(nrows, dtype=PETSc.IntType)
+    cdef np.ndarray[PetscInt, ndim=1] nnz = np.zeros(nrows, dtype=IntType)
+    cdef np.ndarray[PetscInt, ndim=1] onnz = np.zeros(nrows, dtype=IntType)
     cdef np.ndarray[PetscInt, ndim=1] rowptr
     cdef np.ndarray[PetscInt, ndim=1] colidx
     cdef int nz, onz
     if make_rowptr:
-        rowptr = np.empty(nrows + 1, dtype=PETSc.IntType)
+        rowptr = np.empty(nrows + 1, dtype=IntType)
         rowptr[0] = 0
     else:
         # Can't build these, so create dummy arrays
-        rowptr = np.empty(0, dtype=PETSc.IntType).reshape(-1)
-        colidx = np.empty(0, dtype=PETSc.IntType).reshape(-1)
+        rowptr = np.empty(0, dtype=IntType).reshape(-1)
+        colidx = np.empty(0, dtype=IntType).reshape(-1)
 
     nz = 0
     onz = 0
@@ -322,7 +328,7 @@ def build_sparsity(object sparsity, bint parallel, bool block=True):
                 onz += val
 
     if make_rowptr:
-        colidx = np.empty(nz, dtype=PETSc.IntType)
+        colidx = np.empty(nz, dtype=IntType)
         assert diag.size() == 1, "Can't make rowptr for mixed monolithic mat"
         for row in range(nrows):
             diag[0][row].sort()
diff --git a/test/unit/test_api.py b/test/unit/test_api.py
index 72fc4ce89..8ea2c5575 100644
--- a/test/unit/test_api.py
+++ b/test/unit/test_api.py
@@ -1487,8 +1487,9 @@ def test_map_illegal_length(self, iterset, toset):
 
     def test_map_convert_float_int(self, iterset, toset):
         "Float data should be implicitely converted to int."
+        from pyop2.datatypes import IntType
         m = op2.Map(iterset, toset, 1, [1.5] * iterset.size)
-        assert m.values.dtype == np.int32 and m.values.sum() == iterset.size
+        assert m.values.dtype == IntType and m.values.sum() == iterset.size
 
     def test_map_reshape(self, iterset, toset):
         "Data should be reshaped according to arity."