From efafb6ded702ec8dd00af115a25923ddacebaa73 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Wed, 2 Oct 2019 08:26:34 +0530 Subject: [PATCH 01/12] Start numpy encoders --- weld-python/Cargo.toml | 1 + weld-python/src/lib.rs | 2 ++ weld-python/src/npy_enc.rs | 40 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 weld-python/src/npy_enc.rs diff --git a/weld-python/Cargo.toml b/weld-python/Cargo.toml index 25fd9c22d..5772ed6c8 100644 --- a/weld-python/Cargo.toml +++ b/weld-python/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] libc = "0.2.0" weld = { path = "../weld" } +numpy = "0.7.0" [dependencies.pyo3] version = "0.8.0" diff --git a/weld-python/src/lib.rs b/weld-python/src/lib.rs index dfb428f9c..c4b32d62e 100644 --- a/weld-python/src/lib.rs +++ b/weld-python/src/lib.rs @@ -5,6 +5,8 @@ use pyo3::import_exception; use weld; +pub mod npy_enc; + import_exception!(weld, WeldError); /// Converts a `Result` to `PyResult`. diff --git a/weld-python/src/npy_enc.rs b/weld-python/src/npy_enc.rs new file mode 100644 index 000000000..bcbba0bf8 --- /dev/null +++ b/weld-python/src/npy_enc.rs @@ -0,0 +1,40 @@ +//! Encoders and decoders for some common NumPy data types. +//! +//! This module supports zero-copy encoding/decoding using NumPy arrays of the following Weld +//! types: +//! +//! * vec[T] where T is an unsigned or signed integer. +//! * vec[T] where T is a float or double. +//! * vec[T] where T is a boolean. +//! * vec[T] where T is a fixed-size string (dtype='|Sx') +//! +//! In addition, this module supports encoding Python string objects, but requires copying data. +//! +//! 2D NumPy arrays are no longer support because their representation as vec[vec[T]] is quite +//! inefficient. Weld will eventually include a tensor[T,shape] type that will support this. + +use pyo3::prelude::*; +use pyo3::import_exception; + +use numpy::PyArray1; + +use weld::data; + +/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime. +fn to_weld_1d(array: &PyArray1) -> data::WeldVec { + let array_obj = array.as_array_ptr(); + data::WeldVec { + data: unsafe { (*array_obj).data as *mut T }, + len: array.len() as i64 + } +} + +/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime. +fn to_numpy_1d(vec: &data::WeldVec) -> PyArray1 { + unimplemented!("This needs to be implemented"); +} + +#[pymodule] +fn numpy_encoders(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + Ok(()) +} From 2549982b585aec8cfc1024f91c915aac133829c0 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Thu, 3 Oct 2019 08:45:50 +0530 Subject: [PATCH 02/12] dtype mapping --- weld-python/src/npy_enc.rs | 21 ++++++++- weld-python/weld/encoders/numpy.py | 69 +++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/weld-python/src/npy_enc.rs b/weld-python/src/npy_enc.rs index bcbba0bf8..2e9d99b01 100644 --- a/weld-python/src/npy_enc.rs +++ b/weld-python/src/npy_enc.rs @@ -14,12 +14,25 @@ //! inefficient. Weld will eventually include a tensor[T,shape] type that will support this. use pyo3::prelude::*; -use pyo3::import_exception; +use pyo3::wrap_pyfunction; use numpy::PyArray1; use weld::data; +#[pyclass] +struct WeldVec { + vec: data::WeldVec, +} + +impl WeldVec { + fn new(vec: data::WeldVec) -> Self { + WeldVec { + vec + } + } +} + /// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime. fn to_weld_1d(array: &PyArray1) -> data::WeldVec { let array_obj = array.as_array_ptr(); @@ -34,7 +47,13 @@ fn to_numpy_1d(vec: &data::WeldVec) -> PyArray1 { unimplemented!("This needs to be implemented"); } +#[pyfunction] +fn to_weld_1d_i32(array: &PyArray1) -> PyResult { + Ok(WeldVec::new(to_weld_1d::(array))) +} + #[pymodule] fn numpy_encoders(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(to_weld_1d_i32)).unwrap(); Ok(()) } diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index a334965c7..dc7b9caf8 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -1,10 +1,77 @@ """ -Implements some basic encoders for NumPy arrays. +Implements encoders for NumPy values. + +Zero-copy conversions (in particular, to 1D arrays) are implemented here +directly since they only involve a pointer copy. Conversions of types that +currently require copies are implemented in Rust. """ import numpy as np from .encoder_base import * +from ..types import * + +# Maps a string dtype representation to a Weld scalar type. +_known_types = { + 'int8': I8(), + 'int16': I16(), + 'int32': I32(), + 'int64': I64(), + 'uint8': U8(), + 'uint16': U16(), + 'uint32': U32(), + 'uint64': U64(), + 'float': F32(), + 'float32': F32(), + 'double': F64(), + 'float64': F64(), + 'bool': Bool() + } + +def dtype_to_weld_type(ty): + """Converts a NumPy data type to a Weld type. + + The data type can be a any type that can be converted to a NumPy dtype, + e.g., a string (e.g., 'int32') or a NumPy scalar type (e.g., np.int32). The + type chosen follows the rules specified by NumPy here: + + https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html#dtype + + For example, 'i8' will map to an int64 in Weld, since it indicates a signed + integer that has eight bytes. + + Examples + -------- + >>> dtype_to_weld_type('int32') + + >>> dtype_to_weld_type('float') + + >>> dtype_to_weld_type('i8') + + >>> dtype_to_weld_type(np.int16) + + + Parameters + ---------- + ty : str or dtype or NumPy scalar type + The NumPy type to convert + + Returns + ------- + WeldType + + """ + if not isinstance(ty, np.dtype): + ty = np.dtype(ty) + + ty = str(ty) + if ty in _known_types: + return _known_types.get(ty) + + if ty.startswith('S'): + raise TypeError("Python 2 strings not supported -- use Unicode") + if ty.find('U') != -1: + raise NotImplementedError("Unicode strings not yet supported") class NumPyWeldEncoder(WeldEncoder): pass From fae2755f24937aea912adc58fca14ea0fde61b29 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Thu, 3 Oct 2019 09:00:15 +0530 Subject: [PATCH 03/12] Serialization for 1D arrays --- weld-python/weld/encoders/numpy.py | 38 +++++++++++++++++++++++++++++- weld-python/weld/types.py | 10 ++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index dc7b9caf8..ff6ef377b 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -6,6 +6,7 @@ currently require copies are implemented in Rust. """ +import ctypes import numpy as np from .encoder_base import * @@ -74,7 +75,42 @@ def dtype_to_weld_type(ty): raise NotImplementedError("Unicode strings not yet supported") class NumPyWeldEncoder(WeldEncoder): - pass + + @staticmethod + def _convert_1d_array(array): + """Converts a 1D NumPy array into a Weld vector. + + The vector holds a reference to the array. + + Examples + -------- + >>> arr = np.array([1, 2, 3]) + >>> encoded = NumPyWeldEncoder._convert_1d_array(arr) + >>> encoded.length + c_long(3) + >>> encoded.data.contents + c_long(1) + + Parameters + ---------- + array : ndarray + A one-dimensional NumPy array. + + Returns + ------- + WeldVec + + """ + elem_type = dtype_to_weld_type(array.dtype) + vec_type = WeldVec(elem_type) + + data = array.ctypes.data_as(ctypes.POINTER(elem_type.ctype_class)) + length = ctypes.c_int64(len(array)) + + vec = vec_type.ctype_class() + vec.data = data + vec.length = length + return vec class NumPyWeldDecoder(WeldDecoder): pass diff --git a/weld-python/weld/types.py b/weld-python/weld/types.py index d3f7cbb46..663abcd5b 100644 --- a/weld-python/weld/types.py +++ b/weld-python/weld/types.py @@ -164,16 +164,16 @@ def vec_factory(elem_type): If the vector class already exists, it is delivered via the _singletons dictionary. """ - class Vec(Structure): + class Vec(ctypes.Structure): _fields_ = [ - ("ptr", ctypes.POINTER(elem_type.ctype_class)), - ("size", ctypes.c_long), + ("data", ctypes.POINTER(elem_type.ctype_class)), + ("size", ctypes.c_int64), ] return Vec if self.elem_type not in WeldVec._singletons: - WeldVec._singletons[self.elemType] = vec_factory(self.elemType) - return WeldVec._singletons[self.elemType] + WeldVec._singletons[self.elem_type] = vec_factory(self.elem_type) + return WeldVec._singletons[self.elem_type] class WeldStruct(WeldType): From 03ad6597df7bcfe5a16caf5f258c23e6c759c13d Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 4 Oct 2019 10:37:54 +0530 Subject: [PATCH 04/12] implement encode in encoder --- weld-python/weld/encoders/numpy.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index ff6ef377b..809b3bbd1 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -77,7 +77,7 @@ def dtype_to_weld_type(ty): class NumPyWeldEncoder(WeldEncoder): @staticmethod - def _convert_1d_array(array): + def _convert_1d_array(array, check_type=None): """Converts a 1D NumPy array into a Weld vector. The vector holds a reference to the array. @@ -95,6 +95,10 @@ def _convert_1d_array(array): ---------- array : ndarray A one-dimensional NumPy array. + check_type : WeldType, optional + If this value is passed, this function will check whether the + array's derived WeldType is equal to the passed type. Defaults to + None. Returns ------- @@ -104,6 +108,9 @@ def _convert_1d_array(array): elem_type = dtype_to_weld_type(array.dtype) vec_type = WeldVec(elem_type) + if check_type is not None: + assert check_type == vec_type + data = array.ctypes.data_as(ctypes.POINTER(elem_type.ctype_class)) length = ctypes.c_int64(len(array)) @@ -112,5 +119,12 @@ def _convert_1d_array(array): vec.length = length return vec + def encode(self, obj, ty): + if isinstance(obj, np.ndarray): + if obj.ndim == 1: + return NumPyWeldEncoder._convert_1d_array(obj, check_type=ty) + else: + raise NotImplementedError + class NumPyWeldDecoder(WeldDecoder): pass From f1fae4f6f727871cfe0028ed12195ecc4b2367f9 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 4 Oct 2019 11:33:30 +0530 Subject: [PATCH 05/12] Remove 1d rust encoders --- weld-python/src/lib.rs | 2 -- weld-python/src/npy_enc.rs | 59 -------------------------------------- 2 files changed, 61 deletions(-) delete mode 100644 weld-python/src/npy_enc.rs diff --git a/weld-python/src/lib.rs b/weld-python/src/lib.rs index c4b32d62e..dfb428f9c 100644 --- a/weld-python/src/lib.rs +++ b/weld-python/src/lib.rs @@ -5,8 +5,6 @@ use pyo3::import_exception; use weld; -pub mod npy_enc; - import_exception!(weld, WeldError); /// Converts a `Result` to `PyResult`. diff --git a/weld-python/src/npy_enc.rs b/weld-python/src/npy_enc.rs deleted file mode 100644 index 2e9d99b01..000000000 --- a/weld-python/src/npy_enc.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Encoders and decoders for some common NumPy data types. -//! -//! This module supports zero-copy encoding/decoding using NumPy arrays of the following Weld -//! types: -//! -//! * vec[T] where T is an unsigned or signed integer. -//! * vec[T] where T is a float or double. -//! * vec[T] where T is a boolean. -//! * vec[T] where T is a fixed-size string (dtype='|Sx') -//! -//! In addition, this module supports encoding Python string objects, but requires copying data. -//! -//! 2D NumPy arrays are no longer support because their representation as vec[vec[T]] is quite -//! inefficient. Weld will eventually include a tensor[T,shape] type that will support this. - -use pyo3::prelude::*; -use pyo3::wrap_pyfunction; - -use numpy::PyArray1; - -use weld::data; - -#[pyclass] -struct WeldVec { - vec: data::WeldVec, -} - -impl WeldVec { - fn new(vec: data::WeldVec) -> Self { - WeldVec { - vec - } - } -} - -/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime. -fn to_weld_1d(array: &PyArray1) -> data::WeldVec { - let array_obj = array.as_array_ptr(); - data::WeldVec { - data: unsafe { (*array_obj).data as *mut T }, - len: array.len() as i64 - } -} - -/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime. -fn to_numpy_1d(vec: &data::WeldVec) -> PyArray1 { - unimplemented!("This needs to be implemented"); -} - -#[pyfunction] -fn to_weld_1d_i32(array: &PyArray1) -> PyResult { - Ok(WeldVec::new(to_weld_1d::(array))) -} - -#[pymodule] -fn numpy_encoders(_py: Python<'_>, m: &PyModule) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(to_weld_1d_i32)).unwrap(); - Ok(()) -} From 8cb0984ebf3da7540e1a8b7b67d94347c3ae8d9f Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Thu, 10 Oct 2019 21:17:03 +0530 Subject: [PATCH 06/12] simple decoding of numpy arrays --- weld-python/weld/encoders/numpy.py | 89 ++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 5 deletions(-) diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index 809b3bbd1..6997f30f7 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -22,13 +22,43 @@ 'uint16': U16(), 'uint32': U32(), 'uint64': U64(), - 'float': F32(), 'float32': F32(), + 'float': F64(), 'double': F64(), 'float64': F64(), 'bool': Bool() } +# Reverse of the above. +_known_types_weld2dtype = {v: k for k, v in _known_types.items()} + +def weld_type_to_dtype(ty): + """Converts a Weld type to a NumPy dtype. + + Examples + -------- + >>> weld_type_to_dtype(I32()) + dtype('int32') + >>> weld_type_to_dtype(F32()) + dtype('float32') + >>> weld_type_to_dtype(F64()) + dtype('float64') + + Parameters + ---------- + ty: WeldType + The type to convert + + Returns + ------- + dtype + + """ + if ty in _known_types_weld2dtype: + return np.dtype(_known_types_weld2dtype[ty]) + + raise NotImplementedError("String Weld -> dtype not supported") + def dtype_to_weld_type(ty): """Converts a NumPy data type to a Weld type. @@ -86,8 +116,8 @@ def _convert_1d_array(array, check_type=None): -------- >>> arr = np.array([1, 2, 3]) >>> encoded = NumPyWeldEncoder._convert_1d_array(arr) - >>> encoded.length - c_long(3) + >>> encoded.size + 3 >>> encoded.data.contents c_long(1) @@ -116,7 +146,7 @@ def _convert_1d_array(array, check_type=None): vec = vec_type.ctype_class() vec.data = data - vec.length = length + vec.size = length return vec def encode(self, obj, ty): @@ -125,6 +155,55 @@ def encode(self, obj, ty): return NumPyWeldEncoder._convert_1d_array(obj, check_type=ty) else: raise NotImplementedError + else: + raise TypeError("Unexpected type {} in NumPy encoder".format(type(obj))) class NumPyWeldDecoder(WeldDecoder): - pass + """ Decodes an encoded Weld array into a NumPy array. + + >>> arr = np.array([1,2,3], dtype='int32') + >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32())) + >>> NumPyWeldDecoder().decode(encoded, WeldVec(I32())) + array([1, 2, 3], dtype=int32) + + """ + + @staticmethod + def _memory_buffer(c_pointer, length, dtype): + """Creates a Python memory buffer from the pointer. + + Parameters + ---------- + + c_pointer : ctypes pointer + the pointer the buffer points to + length : int + the array length + dtype : NumPy dtype + the type of the elements in the buffer. + + Returns + ------- + memory + + """ + arr_size = dtype.itemsize * length + buf_from_mem = ctypes.pythonapi.PyMemoryView_FromMemory + buf_from_mem.restype = ctypes.py_object + buf_from_mem.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_int) + return buf_from_mem(c_pointer, arr_size, 0x100) + + + def decode(self, obj, restype): + # A 1D NumPy array + if isinstance(restype, WeldVec) and\ + not isinstance(restype.elem_type, WeldVec): + elem_type = restype.elem_type + dtype = weld_type_to_dtype(elem_type) + pointer = obj.data + size = obj.size + array = np.frombuffer(NumPyWeldDecoder._memory_buffer(pointer, size, dtype), + dtype=dtype, count=size) + return array + else: + raise TypeError("Unsupported type {} in NumPy decoder".format(type(obj))) From 684d83d312d020ea5c0e3364b2b0847ee0e7d8b1 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 11 Oct 2019 17:11:45 +0530 Subject: [PATCH 07/12] Add 1D Numpy encoding tests --- weld-python/tests/encoders/__init__.py | 0 weld-python/tests/encoders/helpers.py | 60 ++++++++++++++++ weld-python/tests/encoders/test_numpy.py | 71 +++++++++++++++++++ weld-python/tests/encoders/test_primitives.py | 37 +--------- weld-python/weld/encoders/numpy.py | 3 +- 5 files changed, 135 insertions(+), 36 deletions(-) create mode 100644 weld-python/tests/encoders/__init__.py create mode 100644 weld-python/tests/encoders/helpers.py create mode 100644 weld-python/tests/encoders/test_numpy.py diff --git a/weld-python/tests/encoders/__init__.py b/weld-python/tests/encoders/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/weld-python/tests/encoders/helpers.py b/weld-python/tests/encoders/helpers.py new file mode 100644 index 000000000..87c92fa4d --- /dev/null +++ b/weld-python/tests/encoders/helpers.py @@ -0,0 +1,60 @@ + +import ctypes + +def encdec_factory(encoder, decoder, eq=None): + """ Returns a function that encodes and decodes a value. + + Parameters + ---------- + encoder : WeldEncoder + the encoder class to use. + decoder : WeldDecoder + the decoder class to use. + eq : function (T, T) => bool, optional (default=None) + the equality function to use. If this is `None`, the `==` operator is + used. + + Returns + ------- + function + + """ + def encdec(value, ty, assert_equal=True, err=False): + """ Helper function that encodes a value and decodes it. + + The function asserts that the original value and the decoded value are + equal. + + Parameters + ---------- + value : any + The value to encode and decode + ty : WeldType + the WeldType of the value + assert_equal : bool (default True) + Checks whether the original value and decoded value are equal. + err : bool (default False) + If True, expects an error. + + """ + enc = encoder() + dec = decoder() + + try: + result = dec.decode(ctypes.pointer(enc.encode(value, ty)), ty) + except Exception as e: + if err: + return + else: + raise e + + if err: + raise RuntimeError("Expected error during encode/decode") + + if assert_equal: + if eq is not None: + assert eq(value, result) + else: + assert value == result + + return encdec diff --git a/weld-python/tests/encoders/test_numpy.py b/weld-python/tests/encoders/test_numpy.py new file mode 100644 index 000000000..4b8300b80 --- /dev/null +++ b/weld-python/tests/encoders/test_numpy.py @@ -0,0 +1,71 @@ +""" +Tests NumPy encoders and decoders. +""" + +import ctypes +import numpy as np + +from .helpers import encdec_factory +from weld.encoders.numpy import NumPyWeldEncoder, NumPyWeldDecoder +from weld.types import * + +encdec = encdec_factory(NumPyWeldEncoder, NumPyWeldDecoder, eq=np.allclose) + +def array(dtype, length=5): + """Creates a 1D NumPy array with the given data type. + + The array is filled with data [1...length). + + >>> array('int8') + array([0, 1, 2, 3, 4], dtype=int8) + >>> array('float32') + array([0., 1., 2., 3., 4.], dtype=float32) + + Parameters + ---------- + dtype: np.dtype + data type of array elements + length: int + elements in array + + Returns + ------- + np.ndarray + + """ + return np.arange(start=0, stop=length, dtype=dtype) + +def test_bool_vec(): + # Booleans in NumPy, like in Weld, are represented as bytes. + encdec(np.array([True, True, False, False, True], dtype='bool'), + WeldVec(Bool())) + +def test_i8_vec(): + encdec(array('int8'), WeldVec(I8())) + +def test_i16_vec(): + encdec(array('int16'), WeldVec(I16())) + +def test_i32_vec(): + encdec(array('int32'), WeldVec(I32())) + +def test_i64_vec(): + encdec(array('int64'), WeldVec(I64())) + +def test_u8_vec(): + encdec(array('uint8'), WeldVec(U8())) + +def test_u16_vec(): + encdec(array('uint16'), WeldVec(U16())) + +def test_u32_vec(): + encdec(array('uint32'), WeldVec(U32())) + +def test_u64_vec(): + encdec(array('uint64'), WeldVec(U64())) + +def test_float32_vec(): + encdec(array('float32'), WeldVec(F32())) + +def test_float64_vec(): + encdec(array('float64'), WeldVec(F64())) diff --git a/weld-python/tests/encoders/test_primitives.py b/weld-python/tests/encoders/test_primitives.py index 686f8537b..09e551075 100644 --- a/weld-python/tests/encoders/test_primitives.py +++ b/weld-python/tests/encoders/test_primitives.py @@ -2,46 +2,13 @@ Tests primitive encoders and decoders. """ -import copy import ctypes +from .helpers import encdec_factory from weld.encoders import PrimitiveWeldEncoder, PrimitiveWeldDecoder from weld.types import * -def encdec(value, ty, assert_equal=True, err=False): - """ Helper function that encodes a value and decodes it. - - The function asserts that the original value and the decoded value are - equal. - - Parameters - ---------- - value : any - The value to encode and decode - ty : WeldType - the WeldType of the value - assert_equal : bool (default True) - Checks whether the original value and decoded value are equal. - err : bool (default False) - If True, expects an error. - - """ - enc = PrimitiveWeldEncoder() - dec = PrimitiveWeldDecoder() - - try: - result = dec.decode(ctypes.pointer(enc.encode(value, ty)), ty) - except Exception as e: - if err: - return - else: - raise e - - if err: - raise RuntimeError("Expected error during encode/decode") - - if assert_equal: - assert value == result +encdec = encdec_factory(PrimitiveWeldEncoder, PrimitiveWeldDecoder) def test_i8_encode(): encdec(-1, I8()) diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index 6997f30f7..f61501f93 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -163,7 +163,7 @@ class NumPyWeldDecoder(WeldDecoder): >>> arr = np.array([1,2,3], dtype='int32') >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32())) - >>> NumPyWeldDecoder().decode(encoded, WeldVec(I32())) + >>> NumPyWeldDecoder().decode(ctypes.pointer(encoded), WeldVec(I32())) array([1, 2, 3], dtype=int32) """ @@ -196,6 +196,7 @@ def _memory_buffer(c_pointer, length, dtype): def decode(self, obj, restype): # A 1D NumPy array + obj = obj.contents if isinstance(restype, WeldVec) and\ not isinstance(restype.elem_type, WeldVec): elem_type = restype.elem_type From e89b1bcee71f84768d08c2e90197c6f6407e3a66 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 11 Oct 2019 17:54:15 +0530 Subject: [PATCH 08/12] more numpy decoding --- weld-python/weld/compile.py | 4 +- weld-python/weld/encoders/encoder_base.py | 8 ++- weld-python/weld/encoders/numpy.py | 78 +++++++++++++++++++---- weld-python/weld/encoders/primitives.py | 2 +- 4 files changed, 74 insertions(+), 18 deletions(-) diff --git a/weld-python/weld/compile.py b/weld-python/weld/compile.py index fa68562c9..b5bb436ba 100644 --- a/weld-python/weld/compile.py +++ b/weld-python/weld/compile.py @@ -167,9 +167,9 @@ def func(*args, context=None): data = ctypes.cast(result.data(), pointer_type) if decoder is not None: - result = decoder.decode(data, restype) + result = decoder.decode(data, restype, context) else: - result = primitive_decoder.decode(data, restype) + result = primitive_decoder.decode(data, restype, context) return (result, context) return func diff --git a/weld-python/weld/encoders/encoder_base.py b/weld-python/weld/encoders/encoder_base.py index b1d99e17d..f640d0e49 100644 --- a/weld-python/weld/encoders/encoder_base.py +++ b/weld-python/weld/encoders/encoder_base.py @@ -38,7 +38,7 @@ class WeldDecoder(ABC): """ @abstractmethod - def decode(obj, restype): + def decode(self, obj, restype, context): """ Decodes the object, assuming object has the WeldType restype. @@ -51,11 +51,15 @@ def decode(obj, restype): An object encoded in the Weld ABI. restype : WeldType The WeldType of the object that is being decoded. + context : WeldContext or None + The context backing `obj` if this value was constructed in Weld. Returns ------- any - The decoder can return any Python value. + The decoder can return any Python value. If the data is not copied + and context is not `None`, the returned object should hold a + reference to the context to prevent use-after-free bugs. """ pass diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index f61501f93..43c1e6943 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -1,9 +1,17 @@ """ Implements encoders for NumPy values. +The Weld package includes native convertors for NumPy arrays because NumPy is +the standard way for interacting with C-like array data. + +The encoder in this package accepts ndarray or its subclasses. The decoder in +this module returns a subclass of ndarray called `weldbasearray`, which may +hold a reference to a `WeldContext`. This prevents arrays backed by memory +allocated in Weld from being freed before the array's reference count drops to +0. + Zero-copy conversions (in particular, to 1D arrays) are implemented here -directly since they only involve a pointer copy. Conversions of types that -currently require copies are implemented in Rust. +directly since they only involve a pointer copy. """ import ctypes @@ -12,6 +20,10 @@ from .encoder_base import * from ..types import * +class weldbasearray(np.ndarray): + # Not implemented yet. + pass + # Maps a string dtype representation to a Weld scalar type. _known_types = { 'int8': I8(), @@ -51,14 +63,13 @@ def weld_type_to_dtype(ty): Returns ------- - dtype + dtype or None + Returns None if the type is not recognized. """ if ty in _known_types_weld2dtype: return np.dtype(_known_types_weld2dtype[ty]) - raise NotImplementedError("String Weld -> dtype not supported") - def dtype_to_weld_type(ty): """Converts a NumPy data type to a Weld type. @@ -158,6 +169,7 @@ def encode(self, obj, ty): else: raise TypeError("Unexpected type {} in NumPy encoder".format(type(obj))) + class NumPyWeldDecoder(WeldDecoder): """ Decodes an encoded Weld array into a NumPy array. @@ -194,17 +206,57 @@ def _memory_buffer(c_pointer, length, dtype): return buf_from_mem(c_pointer, arr_size, 0x100) - def decode(self, obj, restype): + def _numpy_type(weld_type): + """Infers the ndarray dimensions and dtype from a WeldVec type. + + Throws a TypeError if the weld_type cannot be represented as an + ndarray of some scalar type. + + Parameters + ---------- + weld_type : WeldType + The type to check + + Returns + ------- + (int, dtype) tuple + The first element is the nubmer of dimensions and the second + element is the dtype. + + >>> NumPyWeldDecoder._numpy_type(WeldVec(I8())) + (1, dtype('int8')) + >>> NumPyWeldDecoder._numpy_type(WeldVec(WeldVec(F32()))) + (2, dtype('float32')) + >>> NumPyWeldDecoder._numpy_type(I32()) + Traceback (most recent call last): + ... + TypeError: type cannot be represented as ndarray + + """ + if not isinstance(weld_type, WeldVec): + raise TypeError("type cannot be represented as ndarray") + + dimension = 1 + elem_type = weld_type.elem_type + if isinstance(elem_type, WeldVec): + (inner_dims, inner_ty) = NumPyWeldDecoder._numpy_type(elem_type) + dimension += inner_dims + else: + try: + inner_ty = weld_type_to_dtype(elem_type) + except: + raise TypeError("unknown element type {}".format(elem_type)) + return (dimension, inner_ty) + + + def decode(self, obj, restype, context=None): # A 1D NumPy array obj = obj.contents - if isinstance(restype, WeldVec) and\ - not isinstance(restype.elem_type, WeldVec): + (dims, dtype) = NumPyWeldDecoder._numpy_type(restype) + if dims == 1: elem_type = restype.elem_type - dtype = weld_type_to_dtype(elem_type) - pointer = obj.data - size = obj.size - array = np.frombuffer(NumPyWeldDecoder._memory_buffer(pointer, size, dtype), - dtype=dtype, count=size) + buf = NumPyWeldDecoder._memory_buffer(obj.data, obj.size, dtype) + array = np.frombuffer(buf, dtype=dtype, count=obj.size) return array else: raise TypeError("Unsupported type {} in NumPy decoder".format(type(obj))) diff --git a/weld-python/weld/encoders/primitives.py b/weld-python/weld/encoders/primitives.py index d6a8178bf..8c7a6f36b 100644 --- a/weld-python/weld/encoders/primitives.py +++ b/weld-python/weld/encoders/primitives.py @@ -65,7 +65,7 @@ class PrimitiveWeldDecoder(WeldDecoder): >>> decoder.decode(ctypes.pointer(x), struct_type) (1, 1.0) """ - def decode(self, obj, restype): + def decode(self, obj, restype, context=None): if isinstance(restype, Bool): return bool(obj.contents.value) elif isinstance(restype, WeldStruct): From e153606820750c1e0759e134877d2d7631f7b965 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 11 Oct 2019 17:57:24 +0530 Subject: [PATCH 09/12] fix a comment --- weld-python/weld/encoders/numpy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index 43c1e6943..51547eee9 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -207,10 +207,10 @@ def _memory_buffer(c_pointer, length, dtype): def _numpy_type(weld_type): - """Infers the ndarray dimensions and dtype from a WeldVec type. + """Infers the ndarray dimensions and dtype from a Weld type. - Throws a TypeError if the weld_type cannot be represented as an - ndarray of some scalar type. + Throws a TypeError if the weld_type cannot be represented as an ndarray + of some scalar type. Parameters ---------- From 1d45eaf6990b92dabd3a39b3df97a69905485d21 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 11 Oct 2019 19:16:20 +0530 Subject: [PATCH 10/12] fill in weldbasearray --- weld-python/Cargo.toml | 1 - weld-python/weld/encoders/numpy.py | 58 +++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/weld-python/Cargo.toml b/weld-python/Cargo.toml index 5772ed6c8..25fd9c22d 100644 --- a/weld-python/Cargo.toml +++ b/weld-python/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [dependencies] libc = "0.2.0" weld = { path = "../weld" } -numpy = "0.7.0" [dependencies.pyo3] version = "0.8.0" diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index 51547eee9..bef238e15 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -21,8 +21,58 @@ from ..types import * class weldbasearray(np.ndarray): - # Not implemented yet. - pass + """ A NumPy array possibly backed by a `WeldContext`. + + This class is a wrapper around the NumPy `ndarray` class, but it contains + an additional `weld_context` attribute. This attribute references the + memory that backs the array, if the array was returned by Weld (or created + from another array that was returned by Weld). It prevents memory owned by + the context from being freed before all references to the array are + deleted. + + This class also contains an additional method, `copy2numpy`, which + deep-copies the data referenced by this array to a regular `ndarray`. The + resulting array does not hold a reference to the context or the original + array. + + If the `weld_context` attribtue is `None`, this class acts like a regular + `ndarray`, and the `copy2numpy` function simply copies this array. + + """ + + def __new__(cls, input_array, weld_context=None): + """ Instance initializer. + + Parameters + ---------- + weld_context : WeldContext or None + If this is not `None`, it should be the context that owns the + memory for `input_array`. + + """ + obj = np.asarray(input_array).view(cls) + obj.weld_context = weld_context + return obj + + def __array_finalize__(self, obj): + """ Finalizes array. See the NumPy documentation. """ + if obj is None: + return + self.weld_context = getattr(obj, 'weld_context', None) + + def copy2numpy(self): + """ Copies this array's data into a new NumPy `ndarray`. + + Examples + -------- + >>> arr = weldbasearray([1, 2, 3]) + >>> arr + weldbasearray([1, 2, 3]) + >>> arr.copy2numpy() + array([1, 2, 3]) + + """ + return np.array(self, copy=True).view(np.ndarray) # Maps a string dtype representation to a Weld scalar type. _known_types = { @@ -176,7 +226,7 @@ class NumPyWeldDecoder(WeldDecoder): >>> arr = np.array([1,2,3], dtype='int32') >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32())) >>> NumPyWeldDecoder().decode(ctypes.pointer(encoded), WeldVec(I32())) - array([1, 2, 3], dtype=int32) + weldbasearray([1, 2, 3], dtype=int32) """ @@ -257,6 +307,6 @@ def decode(self, obj, restype, context=None): elem_type = restype.elem_type buf = NumPyWeldDecoder._memory_buffer(obj.data, obj.size, dtype) array = np.frombuffer(buf, dtype=dtype, count=obj.size) - return array + return weldbasearray(array, weld_context=context) else: raise TypeError("Unsupported type {} in NumPy decoder".format(type(obj))) From 28516d398932ca7dd3d057f9913d803a6973968b Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 11 Oct 2019 19:25:12 +0530 Subject: [PATCH 11/12] add some sanity tests for weldbasearray --- weld-python/tests/encoders/test_numpy.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/weld-python/tests/encoders/test_numpy.py b/weld-python/tests/encoders/test_numpy.py index 4b8300b80..b7fe92637 100644 --- a/weld-python/tests/encoders/test_numpy.py +++ b/weld-python/tests/encoders/test_numpy.py @@ -6,7 +6,9 @@ import numpy as np from .helpers import encdec_factory -from weld.encoders.numpy import NumPyWeldEncoder, NumPyWeldDecoder + +from weld import WeldConf, WeldContext +from weld.encoders.numpy import weldbasearray, NumPyWeldEncoder, NumPyWeldDecoder from weld.types import * encdec = encdec_factory(NumPyWeldEncoder, NumPyWeldDecoder, eq=np.allclose) @@ -34,6 +36,25 @@ def array(dtype, length=5): """ return np.arange(start=0, stop=length, dtype=dtype) + + +# Tests for ensuring weldbasearrays propagate their contexts properly + +def test_baseweldarray_basics(): + x = np.array([1, 2, 3, 4, 5], dtype="int8") + + ctx = WeldContext(WeldConf()) + + welded = weldbasearray(x, weld_context=ctx) + assert welded.dtype == "int8" + assert welded.weld_context is ctx + + sliced = welded[1:] + assert np.allclose(sliced, np.array([2,3,4,5])) + assert sliced.base is welded + assert sliced.weld_context is ctx + +# Tests for encoding and decoding 1D arrays def test_bool_vec(): # Booleans in NumPy, like in Weld, are represented as bytes. From 42eebff76f00ffff023d1f25a7a6d6a5fa6276d8 Mon Sep 17 00:00:00 2001 From: Shoumik Palkar Date: Fri, 11 Oct 2019 19:32:21 +0530 Subject: [PATCH 12/12] copy2numpy test --- weld-python/tests/encoders/test_numpy.py | 8 ++++++++ weld-python/weld/encoders/numpy.py | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/weld-python/tests/encoders/test_numpy.py b/weld-python/tests/encoders/test_numpy.py index b7fe92637..70c04021c 100644 --- a/weld-python/tests/encoders/test_numpy.py +++ b/weld-python/tests/encoders/test_numpy.py @@ -54,6 +54,14 @@ def test_baseweldarray_basics(): assert sliced.base is welded assert sliced.weld_context is ctx + copied = sliced.copy2numpy() + assert copied.base is None + try: + copied.ctx + assert False + except AttributeError as e: + pass + # Tests for encoding and decoding 1D arrays def test_bool_vec(): diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py index bef238e15..64c5a2017 100644 --- a/weld-python/weld/encoders/numpy.py +++ b/weld-python/weld/encoders/numpy.py @@ -63,6 +63,8 @@ def __array_finalize__(self, obj): def copy2numpy(self): """ Copies this array's data into a new NumPy `ndarray`. + This is an alias for `np.array(arr, copy=True)` + Examples -------- >>> arr = weldbasearray([1, 2, 3]) @@ -72,7 +74,7 @@ def copy2numpy(self): array([1, 2, 3]) """ - return np.array(self, copy=True).view(np.ndarray) + return np.array(self, copy=True) # Maps a string dtype representation to a Weld scalar type. _known_types = {