From b12b63289e95d3f992f6184fcc1a6de380cbb9ff Mon Sep 17 00:00:00 2001 From: Egor Orachev Date: Wed, 30 Aug 2023 12:11:02 +0300 Subject: [PATCH] gh-218: add matrix initial impl in package --- python/example.py | 8 + python/pyspla/matrix.py | 284 ++++++++++++++++++++++++- python/pyspla/type.py | 44 ++-- python/pyspla/vector.py | 75 ++++--- src/core/tmatrix.hpp | 2 +- src/cpu/cpu_format_coo.hpp | 8 + src/storage/storage_manager.hpp | 5 + src/storage/storage_manager_matrix.hpp | 15 ++ 8 files changed, 377 insertions(+), 64 deletions(-) diff --git a/python/example.py b/python/example.py index e618fdf3f..c59696b4d 100644 --- a/python/example.py +++ b/python/example.py @@ -22,3 +22,11 @@ t = pyspla.Vector(shape=10, dtype=pyspla.INT) t.assign(m, pyspla.Scalar(pyspla.INT, 10), pyspla.INT.SECOND, pyspla.INT.GEZERO) print(t.to_list()) + +M = pyspla.Matrix((10, 10), pyspla.INT) +G = pyspla.Matrix.generate((10, 10), pyspla.INT, density=0.1, dist=[0, 10]) + +print(M.to_list()) +print(G.to_lists()) + +print(G.reduce(pyspla.INT.PLUS)) diff --git a/python/pyspla/matrix.py b/python/pyspla/matrix.py index a8c503c20..664649c2f 100644 --- a/python/pyspla/matrix.py +++ b/python/pyspla/matrix.py @@ -28,8 +28,17 @@ import ctypes +from .bridge import backend, check +from .type import Type, INT, UINT, FLOAT +from .object import Object +from .memview import MemView +from .scalar import Scalar +from .descriptor import Descriptor +from .op import OpUnary, OpBinary, OpSelect +import random as rnd -class Matrix: + +class Matrix(Object): """ Generalized statically-typed sparse storage-invariant matrix primitive. @@ -61,5 +70,274 @@ class Matrix: using one of built-in OpenCL or CUDA accelerators. """ - def __init__(self): - pass + __slots__ = ["_dtype", "_shape"] + + def __init__(self, shape, dtype=INT, hnd=None, label=None): + """ + Creates new matrix of specified type and shape. + + :param dtype: Type. + Type parametrization of a storage. + + :param shape: tuple. + Matrix size (2-dim). + + :param hnd: optional: ctypes.c_void_p. default: None. + Optional native handle to retain. + + :param label: optional: str. default: None. + Optional label to assign. + """ + + super().__init__(None, None) + + assert dtype + assert shape + assert shape[0] > 0 and shape[1] > 0 + assert issubclass(dtype, Type) + + self._dtype = dtype + self._shape = shape + + if not hnd: + hnd = ctypes.c_void_p(0) + check(backend().spla_Matrix_make(ctypes.byref(hnd), + ctypes.c_uint(shape[0]), + ctypes.c_uint(shape[1]), + dtype._hnd)) + + super().__init__(label, hnd) + + @property + def dtype(self): + """ + Type used for storage parametrization of this container. + """ + return self._dtype + + @property + def n_rows(self): + """ + Number of rows in the matrix. + """ + return self._shape[0] + + @property + def n_cols(self): + """ + Number of cols in the matrix. + """ + return self._shape[1] + + @property + def shape(self): + """ + 2-Tuple with shape of matrix. + """ + + return self._shape + + def build(self, view_I: MemView, view_J: MemView, view_V: MemView): + """ + Builds matrix content from a raw memory view resources. + + :param view_I: MemView. + View to keys of matrix to assign. + + :param view_J: MemView. + View to keys of matrix to assign. + + :param view_V: MemView. + View to actual values to store. + """ + + assert view_I + assert view_J + assert view_V + + check(backend().spla_Matrix_build(self.hnd, view_I.hnd, view_J.hnd, view_V.hnd)) + + def read(self): + """ + Read the content of the matrix as a MemView of I, J and V. + + :return: tuple (MemView, MemView, MemView) objects with view to the matrix keys and matrix values. + """ + + view_I_hnd = ctypes.c_void_p(0) + view_J_hnd = ctypes.c_void_p(0) + view_V_hnd = ctypes.c_void_p(0) + check(backend().spla_Matrix_read(self.hnd, + ctypes.byref(view_I_hnd), + ctypes.byref(view_J_hnd), + ctypes.byref(view_V_hnd))) + return MemView(hnd=view_I_hnd, owner=self), \ + MemView(hnd=view_J_hnd, owner=self), \ + MemView(hnd=view_V_hnd, owner=self) + + def to_lists(self): + """ + Read matrix data as a python lists of I, J and V. + + :return: Tuple (List, List, List) with the matrix keys and matrix values. + """ + + I, J, V = self.read() + count = int(I.size / ctypes.sizeof(UINT._c_type)) + + if count == 0: + return [], [], [] + + buffer_I = (UINT._c_type * count)() + buffer_J = (UINT._c_type * count)() + buffer_V = (self._dtype._c_type * count)() + + check(backend().spla_MemView_read(I.hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_I), buffer_I)) + check(backend().spla_MemView_read(J.hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_J), buffer_J)) + check(backend().spla_MemView_read(V.hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_V), buffer_V)) + + return list(buffer_I), list(buffer_J), list(buffer_V) + + def to_list(self): + """ + Read matrix data as a python lists of tuples where key and value stored together. + + :return: List of matrix entries as (I, J, V). + """ + + I, J, V = self.to_lists() + return list(zip(I, J, V)) + + @classmethod + def from_lists(cls, I: list, J: list, V: list, shape, dtype=INT): + """ + Build matrix from a list of sorted keys and associated values to store in matrix. + List with keys `I` and `J` must index entries from range [0, shape-1] and all keys must be sorted. + + :param I: list[UINT]. + List with integral keys of entries. + + :param J: list[UINT]. + List with integral keys of entries. + + :param V: list[Type]. + List with values to store in the matrix. + + :param shape: tuple. + Matrix size. + + :param dtype: + Type of storage parametrization for matrix. + + :return: Created matrix filled with values. + """ + + assert len(I) == len(V) + assert len(J) == len(V) + assert shape + assert shape[0] > 0 and shape[1] > 0 + + if not I: + return Matrix(shape, dtype) + + count = len(I) + + c_I = (UINT._c_type * count)(*I) + c_J = (UINT._c_type * count)(*J) + c_V = (dtype._c_type * count)(*V) + + view_I = MemView(buffer=c_I, size=ctypes.sizeof(c_I), mutable=False) + view_J = MemView(buffer=c_J, size=ctypes.sizeof(c_J), mutable=False) + view_V = MemView(buffer=c_V, size=ctypes.sizeof(c_V), mutable=False) + + M = Matrix(shape=shape, dtype=dtype) + M.build(view_I, view_J, view_V) + + return M + + @classmethod + def generate(cls, shape, dtype=INT, density=0.1, seed=None, dist=(0, 1)): + """ + Creates new matrix of desired type and shape and fills its content + with random values, generated using specified distribution. + + :param shape: tuple. + Size of the matrix (number of values). + + :param dtype: optional: Type. default: INT. + Type of values matrix will have. + + :param density: optional: float. default: 0.1. + Density of matrix or how many entries to generate. + + :param seed: optional: int. default: None. + Optional seed to randomize generator. + + :param dist: optional: tuple. default: [0,1]. + Optional distribution for uniform generation of values. + + :return: Created matrix filled with values. + """ + + if seed is not None: + rnd.seed(seed) + + keys = sorted(list({(rnd.randint(0, shape[0] - 1), rnd.randint(0, shape[1] - 1)) + for _ in range(int(shape[0] * shape[1] * density))})) + + I = [k[0] for k in keys] + J = [k[1] for k in keys] + count = len(keys) + + if dtype is INT: + V = [rnd.randint(dist[0], dist[1]) for i in range(count)] + elif dtype is UINT: + V = [rnd.randint(dist[0], dist[1]) for i in range(count)] + elif dtype is FLOAT: + V = [rnd.uniform(dist[0], dist[1]) for i in range(count)] + else: + raise Exception("unknown type") + + return cls.from_lists(I, J, V, shape=shape, dtype=dtype) + + def reduce(self, op_reduce, out=None, init=None, desc=None): + """ + Reduce matrix elements. + + :param op_reduce: OpBinary. + Binary operation to apply for reduction of matrix elements. + + :param out: optional: Scalar: default: 0. + Optional scalar to store result of reduction. + + :param init: optional: Scalar: default: 0. + Optional neutral init value for reduction. + + :param desc: optional: Descriptor. default: None. + Optional descriptor object to configure the execution. + + :return: Scalar value with result. + """ + + if out is None: + out = Scalar(dtype=self.dtype) + if init is None: + init = Scalar(dtype=self.dtype, value=0) + + assert out.dtype == self.dtype + assert init.dtype == self.dtype + + check(backend().spla_Exec_m_reduce(out.hnd, init.hnd, self.hnd, op_reduce.hnd, + self._get_desc(desc), self._get_task(None))) + + return out + + def __iter__(self): + I, J, V = self.to_lists() + return zip(I, J, V) + + def _get_desc(self, desc: Descriptor): + return desc.hnd if desc else ctypes.c_void_p(0) + + def _get_task(self, task): + return ctypes.POINTER(ctypes.c_void_p)() diff --git a/python/pyspla/type.py b/python/pyspla/type.py index 4bfa2d874..cdc62d9be 100644 --- a/python/pyspla/type.py +++ b/python/pyspla/type.py @@ -59,28 +59,28 @@ class Type: _matrix_set = None _hnd = None - PLUS = None - MINUS = None - MULT = None - DIV = None - MINUS_POW2 = None - FIRST = None - SECOND = None - ONE = None - MIN = None - MAX = None - BOR = None - BAND = None - BXOR = None - - EQZERO = None - NQZERO = None - GTZERO = None - GEZERO = None - LTZERO = None - LEZERO = None - ALWAYS = None - NEVER = None + PLUS: OpBinary + MINUS: OpBinary + MULT: OpBinary + DIV: OpBinary + MINUS_POW2: OpBinary + FIRST: OpBinary + SECOND: OpBinary + ONE: OpBinary + MIN: OpBinary + MAX: OpBinary + BOR: OpBinary + BAND: OpBinary + BXOR: OpBinary + + EQZERO: OpSelect + NQZERO: OpSelect + GTZERO: OpSelect + GEZERO: OpSelect + LTZERO: OpSelect + LEZERO: OpSelect + ALWAYS: OpSelect + NEVER: OpSelect @classmethod def get_code(cls): diff --git a/python/pyspla/vector.py b/python/pyspla/vector.py index 217b96877..5e998249b 100644 --- a/python/pyspla/vector.py +++ b/python/pyspla/vector.py @@ -33,7 +33,6 @@ from .object import Object from .memview import MemView from .scalar import Scalar -from .matrix import Matrix from .descriptor import Descriptor from .op import OpUnary, OpBinary, OpSelect import random as rnd @@ -128,21 +127,21 @@ def shape(self): return self._shape - def build(self, keys_view: MemView, values_view: MemView): + def build(self, view_I: MemView, view_V: MemView): """ Builds vector content from a raw memory view resources. - :param keys_view: MemView. + :param view_I: MemView. View to keys of vector to assign. - :param values_view: MemView. + :param view_V: MemView. View to actual values to store. """ - assert keys_view - assert values_view + assert view_I + assert view_V - check(backend().spla_Vector_build(self._hnd, keys_view._hnd, values_view._hnd)) + check(backend().spla_Vector_build(self.hnd, view_I.hnd, view_V.hnd)) def read(self): """ @@ -153,7 +152,7 @@ def read(self): keys_view_hnd = ctypes.c_void_p(0) values_view_hnd = ctypes.c_void_p(0) - check(backend().spla_Vector_read(self._hnd, ctypes.byref(keys_view_hnd), ctypes.byref(values_view_hnd))) + check(backend().spla_Vector_read(self.hnd, ctypes.byref(keys_view_hnd), ctypes.byref(values_view_hnd))) return MemView(hnd=keys_view_hnd, owner=self), MemView(hnd=values_view_hnd, owner=self) def to_lists(self): @@ -163,16 +162,16 @@ def to_lists(self): :return: Tuple (List, List) with the vector keys and vector values. """ - keys, values = self.read() - count = int(keys.size / ctypes.sizeof(UINT._c_type)) + I, V = self.read() + count = int(I.size / ctypes.sizeof(UINT._c_type)) - buffer_keys = (UINT._c_type * count)() - buffer_values = (self._dtype._c_type * count)() + buffer_I = (UINT._c_type * count)() + buffer_V = (self._dtype._c_type * count)() - check(backend().spla_MemView_read(keys._hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_keys), buffer_keys)) - check(backend().spla_MemView_read(values._hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_values), buffer_values)) + check(backend().spla_MemView_read(I.hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_I), buffer_I)) + check(backend().spla_MemView_read(V.hnd, ctypes.c_size_t(0), ctypes.sizeof(buffer_V), buffer_V)) - return list(buffer_keys), list(buffer_values) + return list(buffer_I), list(buffer_V) def to_list(self): """ @@ -181,19 +180,19 @@ def to_list(self): :return: List of vector entries. """ - keys, values = self.to_lists() - return list(zip(keys, values)) + I, V = self.to_lists() + return list(zip(I, V)) @classmethod - def from_lists(cls, keys: list, values: list, shape, dtype=INT): + def from_lists(cls, I: list, V: list, shape, dtype=INT): """ Build vector from a list of sorted keys and associated values to store in vector. List with keys `keys` must index entries from range [0, shape-1] and all keys must be sorted. - :param keys: list[UINT]. + :param I: list[UINT]. List with integral keys of entries. - :param values: list[Type]. + :param V: list[Type]. List with values to store in the vector. :param shape: int. @@ -205,22 +204,22 @@ def from_lists(cls, keys: list, values: list, shape, dtype=INT): :return: Created vector filled with values. """ - assert len(keys) == len(values) + assert len(I) == len(V) assert shape > 0 - if not keys: + if not I: return Vector(shape, dtype) - count = len(keys) + count = len(I) - c_keys = (UINT._c_type * count)(*keys) - c_values = (dtype._c_type * count)(*values) + c_I = (UINT._c_type * count)(*I) + c_V = (dtype._c_type * count)(*V) - view_keys = MemView(buffer=c_keys, size=ctypes.sizeof(c_keys), mutable=False) - view_values = MemView(buffer=c_values, size=ctypes.sizeof(c_values), mutable=False) + view_I = MemView(buffer=c_I, size=ctypes.sizeof(c_I), mutable=False) + view_V = MemView(buffer=c_V, size=ctypes.sizeof(c_V), mutable=False) v = Vector(shape=shape, dtype=dtype) - v.build(view_keys, view_values) + v.build(view_I, view_V) return v @@ -231,7 +230,7 @@ def generate(cls, shape, dtype=INT, density=0.1, seed=None, dist=(0, 1)): with random values, generated using specified distribution. :param shape: int. - Size of the array (number of values). + Size of the vector. :param dtype: optional: Type. default: INT. Type of values vector will have. @@ -245,25 +244,25 @@ def generate(cls, shape, dtype=INT, density=0.1, seed=None, dist=(0, 1)): :param dist: optional: tuple. default: [0,1]. Optional distribution for uniform generation of values. - :return: Created array filled with values. + :return: Created vector filled with values. """ if seed is not None: rnd.seed(seed) - keys = [i for i in range(0, shape) if rnd.uniform(0, 1) < density] - count = len(keys) + I = sorted(list({rnd.randint(0, shape - 1) for _ in range(int(shape * density))})) + count = len(I) if dtype is INT: - values = [rnd.randint(dist[0], dist[1]) for i in range(count)] + V = [rnd.randint(dist[0], dist[1]) for i in range(count)] elif dtype is UINT: - values = [rnd.randint(dist[0], dist[1]) for i in range(count)] + V = [rnd.randint(dist[0], dist[1]) for i in range(count)] elif dtype is FLOAT: - values = [rnd.uniform(dist[0], dist[1]) for i in range(count)] + V = [rnd.uniform(dist[0], dist[1]) for i in range(count)] else: raise Exception("unknown type") - return cls.from_lists(keys, values, shape=shape, dtype=dtype) + return cls.from_lists(I, V, shape=shape, dtype=dtype) def eadd(self, op_add, v, out=None, desc=None): """ @@ -365,8 +364,8 @@ def reduce(self, op_reduce, out=None, init=None, desc=None): return out def __iter__(self): - keys, values = self.to_lists() - return zip(keys, values) + I, V = self.to_lists() + return zip(I, V) def _get_desc(self, desc: Descriptor): return desc.hnd if desc else ctypes.c_void_p(0) diff --git a/src/core/tmatrix.hpp b/src/core/tmatrix.hpp index ed158886e..8a8cb2f95 100644 --- a/src/core/tmatrix.hpp +++ b/src/core/tmatrix.hpp @@ -255,7 +255,7 @@ namespace spla { const auto key_size = sizeof(uint); const auto value_size = sizeof(T); - validate_rwd(FormatMatrix::CpuCoo); + validate_rw(FormatMatrix::CpuCoo); CpuCoo& coo = *get>(); const auto elements_count = coo.Ai.size(); diff --git a/src/cpu/cpu_format_coo.hpp b/src/cpu/cpu_format_coo.hpp index e154977bc..d4aea0129 100644 --- a/src/cpu/cpu_format_coo.hpp +++ b/src/cpu/cpu_format_coo.hpp @@ -37,6 +37,14 @@ namespace spla { * @{ */ + template + void cpu_coo_clear(CpuCoo& in) { + in.Ai.clear(); + in.Aj.clear(); + in.Ax.clear(); + in.values = 0; + } + template void cpu_coo_to_csr(uint n_rows, const CpuCoo& in, diff --git a/src/storage/storage_manager.hpp b/src/storage/storage_manager.hpp index 837991c22..61c47131f 100644 --- a/src/storage/storage_manager.hpp +++ b/src/storage/storage_manager.hpp @@ -132,6 +132,11 @@ namespace spla { if (!storage.is_valid_any()) { const int i = static_cast(format); if (!storage.get_ptr_i(i)) { + if (!m_constructors[i]) { + LOG_MSG(Status::NotImplemented, "no such constructor for format " << i); + assert(false); + return; + } m_constructors[i](storage); } if (m_validators[i]) { diff --git a/src/storage/storage_manager_matrix.hpp b/src/storage/storage_manager_matrix.hpp index b42d9bda2..fa5166218 100644 --- a/src/storage/storage_manager_matrix.hpp +++ b/src/storage/storage_manager_matrix.hpp @@ -30,6 +30,7 @@ #include +#include #include #include #include @@ -56,6 +57,9 @@ namespace spla { manager.register_constructor(FormatMatrix::CpuDok, [](Storage& s) { s.get_ref(FormatMatrix::CpuDok) = make_ref>(); }); + manager.register_constructor(FormatMatrix::CpuCoo, [](Storage& s) { + s.get_ref(FormatMatrix::CpuCoo) = make_ref>(); + }); manager.register_constructor(FormatMatrix::CpuCsr, [](Storage& s) { s.get_ref(FormatMatrix::CpuCsr) = make_ref>(); }); @@ -69,6 +73,10 @@ namespace spla { auto* dok = s.template get>(); cpu_dok_clear(*dok); }); + manager.register_validator_discard(FormatMatrix::CpuCoo, [](Storage& s) { + auto* coo = s.template get>(); + cpu_coo_clear(*coo); + }); manager.register_converter(FormatMatrix::CpuLil, FormatMatrix::CpuDok, [](Storage& s) { auto* lil = s.template get>(); @@ -83,6 +91,13 @@ namespace spla { cpu_lil_to_csr(s.get_n_rows(), *lil, *csr); }); + manager.register_converter(FormatMatrix::CpuCoo, FormatMatrix::CpuCsr, [](Storage& s) { + auto* coo = s.template get>(); + auto* csr = s.template get>(); + cpu_csr_resize(s.get_n_rows(), coo->values, *csr); + cpu_coo_to_csr(s.get_n_rows(), *coo, *csr); + }); + manager.register_converter(FormatMatrix::CpuCsr, FormatMatrix::CpuDok, [](Storage& s) { auto* csr = s.template get>(); auto* dok = s.template get>();