From efafb6ded702ec8dd00af115a25923ddacebaa73 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Wed, 2 Oct 2019 08:26:34 +0530
Subject: [PATCH 01/12] Start numpy encoders

---
 weld-python/Cargo.toml     |  1 +
 weld-python/src/lib.rs     |  2 ++
 weld-python/src/npy_enc.rs | 40 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 weld-python/src/npy_enc.rs
diff --git a/weld-python/Cargo.toml b/weld-python/Cargo.toml
index 25fd9c22d..5772ed6c8 100644
--- a/weld-python/Cargo.toml
+++ b/weld-python/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2018"
 [dependencies]
 libc = "0.2.0"
 weld = { path = "../weld" }
+numpy = "0.7.0"
 
 [dependencies.pyo3]
 version = "0.8.0"
diff --git a/weld-python/src/lib.rs b/weld-python/src/lib.rs
index dfb428f9c..c4b32d62e 100644
--- a/weld-python/src/lib.rs
+++ b/weld-python/src/lib.rs
@@ -5,6 +5,8 @@ use pyo3::import_exception;
 
 use weld;
 
+pub mod npy_enc;
+
 import_exception!(weld, WeldError);
 
 /// Converts a `Result` to `PyResult`.
diff --git a/weld-python/src/npy_enc.rs b/weld-python/src/npy_enc.rs
new file mode 100644
index 000000000..bcbba0bf8
--- /dev/null
+++ b/weld-python/src/npy_enc.rs
@@ -0,0 +1,40 @@
+//! Encoders and decoders for some common NumPy data types.
+//!
+//! This module supports zero-copy encoding/decoding using NumPy arrays of the following Weld
+//! types:
+//!
+//! * vec[T] where T is an unsigned or signed integer.
+//! * vec[T] where T is a float or double.
+//! * vec[T] where T is a boolean.
+//! * vec[T] where T is a fixed-size string (dtype='|Sx')
+//!
+//! In addition, this module supports encoding Python string objects, but requires copying data.
+//!
+//! 2D NumPy arrays are no longer support because their representation as vec[vec[T]] is quite
+//! inefficient. Weld will eventually include a tensor[T,shape] type that will support this.
+
+use pyo3::prelude::*;
+use pyo3::import_exception;
+
+use numpy::PyArray1;
+
+use weld::data;
+
+/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime.
+fn to_weld_1d<T>(array: &PyArray1<T>) -> data::WeldVec<T> {
+    let array_obj = array.as_array_ptr();
+    data::WeldVec {
+        data: unsafe { (*array_obj).data as *mut T },
+        len: array.len() as i64
+    }
+}
+
+/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime.
+fn to_numpy_1d<T>(vec: &data::WeldVec<T>) -> PyArray1<T> {
+    unimplemented!("This needs to be implemented");
+}
+
+#[pymodule]
+fn numpy_encoders(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    Ok(())
+}

From 2549982b585aec8cfc1024f91c915aac133829c0 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Thu, 3 Oct 2019 08:45:50 +0530
Subject: [PATCH 02/12] dtype mapping

---
 weld-python/src/npy_enc.rs         | 21 ++++++++-
 weld-python/weld/encoders/numpy.py | 69 +++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/weld-python/src/npy_enc.rs b/weld-python/src/npy_enc.rs
index bcbba0bf8..2e9d99b01 100644
--- a/weld-python/src/npy_enc.rs
+++ b/weld-python/src/npy_enc.rs
@@ -14,12 +14,25 @@
 //! inefficient. Weld will eventually include a tensor[T,shape] type that will support this.
 
 use pyo3::prelude::*;
-use pyo3::import_exception;
+use pyo3::wrap_pyfunction;
 
 use numpy::PyArray1;
 
 use weld::data;
 
+#[pyclass]
+struct WeldVec {
+    vec: data::WeldVec<i32>,
+}
+
+impl WeldVec {
+    fn new(vec: data::WeldVec<i32>) -> Self {
+        WeldVec {
+            vec
+        }
+    }
+}
+
 /// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime.
 fn to_weld_1d<T>(array: &PyArray1<T>) -> data::WeldVec<T> {
     let array_obj = array.as_array_ptr();
@@ -34,7 +47,13 @@ fn to_numpy_1d<T>(vec: &data::WeldVec<T>) -> PyArray1<T> {
     unimplemented!("This needs to be implemented");
 }
 
+#[pyfunction]
+fn to_weld_1d_i32(array: &PyArray1<i32>) -> PyResult<WeldVec> {
+    Ok(WeldVec::new(to_weld_1d::<i32>(array)))
+}
+
 #[pymodule]
 fn numpy_encoders(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    m.add_wrapped(wrap_pyfunction!(to_weld_1d_i32)).unwrap();
     Ok(())
 }
diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index a334965c7..dc7b9caf8 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -1,10 +1,77 @@
 """
-Implements some basic encoders for NumPy arrays.
+Implements encoders for NumPy values.
+
+Zero-copy conversions (in particular, to 1D arrays) are implemented here
+directly since they only involve a pointer copy. Conversions of types that
+currently require copies are implemented in Rust.
 """
 
 import numpy as np
 
 from .encoder_base import *
+from ..types import *
+
+# Maps a string dtype representation to a Weld scalar type.
+_known_types = {
+        'int8': I8(),
+        'int16': I16(),
+        'int32': I32(),
+        'int64': I64(),
+        'uint8': U8(),
+        'uint16': U16(),
+        'uint32': U32(),
+        'uint64': U64(),
+        'float': F32(),
+        'float32': F32(),
+        'double': F64(),
+        'float64': F64(),
+        'bool': Bool()
+        }
+
+def dtype_to_weld_type(ty):
+    """Converts a NumPy data type to a Weld type.
+
+    The data type can be a any type that can be converted to a NumPy dtype,
+    e.g., a string (e.g., 'int32') or a NumPy scalar type (e.g., np.int32). The
+    type chosen follows the rules specified by NumPy here:
+
+    https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html#dtype
+
+    For example, 'i8' will map to an int64 in Weld, since it indicates a signed
+    integer that has eight bytes.
+
+    Examples
+    --------
+    >>> dtype_to_weld_type('int32')
+    <weld.types.I32 object at 0x...>
+    >>> dtype_to_weld_type('float')
+    <weld.types.F64 object at 0x...>
+    >>> dtype_to_weld_type('i8')
+    <weld.types.I64 object at 0x...>
+    >>> dtype_to_weld_type(np.int16)
+    <weld.types.I16 object at 0x...>
+
+    Parameters
+    ----------
+    ty : str or dtype or NumPy scalar type
+        The NumPy type to convert
+
+    Returns
+    -------
+    WeldType
+
+    """
+    if not isinstance(ty, np.dtype):
+        ty = np.dtype(ty)
+
+    ty = str(ty)
+    if ty in _known_types:
+        return _known_types.get(ty)
+
+    if ty.startswith('S'):
+        raise TypeError("Python 2 strings not supported -- use Unicode")
+    if ty.find('U') != -1:
+        raise NotImplementedError("Unicode strings not yet supported")
 
 class NumPyWeldEncoder(WeldEncoder):
     pass

From fae2755f24937aea912adc58fca14ea0fde61b29 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Thu, 3 Oct 2019 09:00:15 +0530
Subject: [PATCH 03/12] Serialization for 1D arrays

---
 weld-python/weld/encoders/numpy.py | 38 +++++++++++++++++++++++++++++-
 weld-python/weld/types.py          | 10 ++++----
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index dc7b9caf8..ff6ef377b 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -6,6 +6,7 @@
 currently require copies are implemented in Rust.
 """
 
+import ctypes
 import numpy as np
 
 from .encoder_base import *
@@ -74,7 +75,42 @@ def dtype_to_weld_type(ty):
         raise NotImplementedError("Unicode strings not yet supported")
 
 class NumPyWeldEncoder(WeldEncoder):
-    pass
+
+    @staticmethod
+    def _convert_1d_array(array):
+        """Converts a 1D NumPy array into a Weld vector.
+
+        The vector holds a reference to the array.
+
+        Examples
+        --------
+        >>> arr = np.array([1, 2, 3])
+        >>> encoded = NumPyWeldEncoder._convert_1d_array(arr)
+        >>> encoded.length
+        c_long(3)
+        >>> encoded.data.contents
+        c_long(1)
+
+        Parameters
+        ----------
+        array : ndarray
+            A one-dimensional NumPy array.
+
+        Returns
+        -------
+        WeldVec
+
+        """
+        elem_type = dtype_to_weld_type(array.dtype)
+        vec_type = WeldVec(elem_type)
+
+        data = array.ctypes.data_as(ctypes.POINTER(elem_type.ctype_class))
+        length = ctypes.c_int64(len(array))
+
+        vec = vec_type.ctype_class()
+        vec.data = data
+        vec.length = length
+        return vec
 
 class NumPyWeldDecoder(WeldDecoder):
     pass
diff --git a/weld-python/weld/types.py b/weld-python/weld/types.py
index d3f7cbb46..663abcd5b 100644
--- a/weld-python/weld/types.py
+++ b/weld-python/weld/types.py
@@ -164,16 +164,16 @@ def vec_factory(elem_type):
             If the vector class already exists, it is delivered via the
             _singletons dictionary.
             """
-            class Vec(Structure):
+            class Vec(ctypes.Structure):
                 _fields_ = [
-                    ("ptr", ctypes.POINTER(elem_type.ctype_class)),
-                    ("size", ctypes.c_long),
+                    ("data", ctypes.POINTER(elem_type.ctype_class)),
+                    ("size", ctypes.c_int64),
                 ]
             return Vec
 
         if self.elem_type not in WeldVec._singletons:
-            WeldVec._singletons[self.elemType] = vec_factory(self.elemType)
-        return WeldVec._singletons[self.elemType]
+            WeldVec._singletons[self.elem_type] = vec_factory(self.elem_type)
+        return WeldVec._singletons[self.elem_type]
 
 
 class WeldStruct(WeldType):

From 03ad6597df7bcfe5a16caf5f258c23e6c759c13d Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 4 Oct 2019 10:37:54 +0530
Subject: [PATCH 04/12] implement encode in encoder

---
 weld-python/weld/encoders/numpy.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index ff6ef377b..809b3bbd1 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -77,7 +77,7 @@ def dtype_to_weld_type(ty):
 class NumPyWeldEncoder(WeldEncoder):
 
     @staticmethod
-    def _convert_1d_array(array):
+    def _convert_1d_array(array, check_type=None):
         """Converts a 1D NumPy array into a Weld vector.
 
         The vector holds a reference to the array.
@@ -95,6 +95,10 @@ def _convert_1d_array(array):
         ----------
         array : ndarray
             A one-dimensional NumPy array.
+        check_type : WeldType, optional
+            If this value is passed, this function will check whether the
+            array's derived WeldType is equal to the passed type.  Defaults to
+            None.
 
         Returns
         -------
@@ -104,6 +108,9 @@ def _convert_1d_array(array):
         elem_type = dtype_to_weld_type(array.dtype)
         vec_type = WeldVec(elem_type)
 
+        if check_type is not None:
+            assert check_type == vec_type
+
         data = array.ctypes.data_as(ctypes.POINTER(elem_type.ctype_class))
         length = ctypes.c_int64(len(array))
 
@@ -112,5 +119,12 @@ def _convert_1d_array(array):
         vec.length = length
         return vec
 
+    def encode(self, obj, ty):
+        if isinstance(obj, np.ndarray):
+            if obj.ndim == 1:
+                return NumPyWeldEncoder._convert_1d_array(obj, check_type=ty)
+            else:
+                raise NotImplementedError
+
 class NumPyWeldDecoder(WeldDecoder):
     pass

From f1fae4f6f727871cfe0028ed12195ecc4b2367f9 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 4 Oct 2019 11:33:30 +0530
Subject: [PATCH 05/12] Remove 1d rust encoders

---
 weld-python/src/lib.rs     |  2 --
 weld-python/src/npy_enc.rs | 59 --------------------------------------
 2 files changed, 61 deletions(-)
 delete mode 100644 weld-python/src/npy_enc.rs

diff --git a/weld-python/src/lib.rs b/weld-python/src/lib.rs
index c4b32d62e..dfb428f9c 100644
--- a/weld-python/src/lib.rs
+++ b/weld-python/src/lib.rs
@@ -5,8 +5,6 @@ use pyo3::import_exception;
 
 use weld;
 
-pub mod npy_enc;
-
 import_exception!(weld, WeldError);
 
 /// Converts a `Result` to `PyResult`.
diff --git a/weld-python/src/npy_enc.rs b/weld-python/src/npy_enc.rs
deleted file mode 100644
index 2e9d99b01..000000000
--- a/weld-python/src/npy_enc.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-//! Encoders and decoders for some common NumPy data types.
-//!
-//! This module supports zero-copy encoding/decoding using NumPy arrays of the following Weld
-//! types:
-//!
-//! * vec[T] where T is an unsigned or signed integer.
-//! * vec[T] where T is a float or double.
-//! * vec[T] where T is a boolean.
-//! * vec[T] where T is a fixed-size string (dtype='|Sx')
-//!
-//! In addition, this module supports encoding Python string objects, but requires copying data.
-//!
-//! 2D NumPy arrays are no longer support because their representation as vec[vec[T]] is quite
-//! inefficient. Weld will eventually include a tensor[T,shape] type that will support this.
-
-use pyo3::prelude::*;
-use pyo3::wrap_pyfunction;
-
-use numpy::PyArray1;
-
-use weld::data;
-
-#[pyclass]
-struct WeldVec {
-    vec: data::WeldVec<i32>,
-}
-
-impl WeldVec {
-    fn new(vec: data::WeldVec<i32>) -> Self {
-        WeldVec {
-            vec
-        }
-    }
-}
-
-/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime.
-fn to_weld_1d<T>(array: &PyArray1<T>) -> data::WeldVec<T> {
-    let array_obj = array.as_array_ptr();
-    data::WeldVec {
-        data: unsafe { (*array_obj).data as *mut T },
-        len: array.len() as i64
-    }
-}
-
-/// Converts a 1D NumPy array into a `WeldVec` that can be passed to the Weld runtime.
-fn to_numpy_1d<T>(vec: &data::WeldVec<T>) -> PyArray1<T> {
-    unimplemented!("This needs to be implemented");
-}
-
-#[pyfunction]
-fn to_weld_1d_i32(array: &PyArray1<i32>) -> PyResult<WeldVec> {
-    Ok(WeldVec::new(to_weld_1d::<i32>(array)))
-}
-
-#[pymodule]
-fn numpy_encoders(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
-    m.add_wrapped(wrap_pyfunction!(to_weld_1d_i32)).unwrap();
-    Ok(())
-}

From 8cb0984ebf3da7540e1a8b7b67d94347c3ae8d9f Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Thu, 10 Oct 2019 21:17:03 +0530
Subject: [PATCH 06/12] simple decoding of numpy arrays

---
 weld-python/weld/encoders/numpy.py | 89 ++++++++++++++++++++++++++++--
 1 file changed, 84 insertions(+), 5 deletions(-)

diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index 809b3bbd1..6997f30f7 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -22,13 +22,43 @@
         'uint16': U16(),
         'uint32': U32(),
         'uint64': U64(),
-        'float': F32(),
         'float32': F32(),
+        'float': F64(),
         'double': F64(),
         'float64': F64(),
         'bool': Bool()
         }
 
+# Reverse of the above.
+_known_types_weld2dtype = {v: k for k, v in _known_types.items()}
+
+def weld_type_to_dtype(ty):
+    """Converts a Weld type to a NumPy dtype.
+
+    Examples
+    --------
+    >>> weld_type_to_dtype(I32())
+    dtype('int32')
+    >>> weld_type_to_dtype(F32())
+    dtype('float32')
+    >>> weld_type_to_dtype(F64())
+    dtype('float64')
+
+    Parameters
+    ----------
+    ty: WeldType
+        The type to convert
+
+    Returns
+    -------
+    dtype
+    
+    """
+    if ty in _known_types_weld2dtype:
+        return np.dtype(_known_types_weld2dtype[ty])
+
+    raise NotImplementedError("String Weld -> dtype not supported")
+
 def dtype_to_weld_type(ty):
     """Converts a NumPy data type to a Weld type.
 
@@ -86,8 +116,8 @@ def _convert_1d_array(array, check_type=None):
         --------
         >>> arr = np.array([1, 2, 3])
         >>> encoded = NumPyWeldEncoder._convert_1d_array(arr)
-        >>> encoded.length
-        c_long(3)
+        >>> encoded.size
+        3
         >>> encoded.data.contents
         c_long(1)
 
@@ -116,7 +146,7 @@ def _convert_1d_array(array, check_type=None):
 
         vec = vec_type.ctype_class()
         vec.data = data
-        vec.length = length
+        vec.size = length
         return vec
 
     def encode(self, obj, ty):
@@ -125,6 +155,55 @@ def encode(self, obj, ty):
                 return NumPyWeldEncoder._convert_1d_array(obj, check_type=ty)
             else:
                 raise NotImplementedError
+        else:
+            raise TypeError("Unexpected type {} in NumPy encoder".format(type(obj)))
 
 class NumPyWeldDecoder(WeldDecoder):
-    pass
+    """ Decodes an encoded Weld array into a NumPy array.
+
+    >>> arr = np.array([1,2,3], dtype='int32')
+    >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32()))
+    >>> NumPyWeldDecoder().decode(encoded, WeldVec(I32()))
+    array([1, 2, 3], dtype=int32)
+
+    """
+
+    @staticmethod
+    def _memory_buffer(c_pointer, length, dtype):
+        """Creates a Python memory buffer from the pointer.
+
+        Parameters
+        ----------
+
+        c_pointer : ctypes pointer
+            the pointer the buffer points to
+        length : int
+            the array length
+        dtype : NumPy dtype
+            the type of the elements in the buffer.
+
+        Returns
+        -------
+        memory
+
+        """
+        arr_size = dtype.itemsize * length
+        buf_from_mem = ctypes.pythonapi.PyMemoryView_FromMemory
+        buf_from_mem.restype = ctypes.py_object
+        buf_from_mem.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_int)
+        return buf_from_mem(c_pointer, arr_size, 0x100)
+
+
+    def decode(self, obj, restype):
+        # A 1D NumPy array
+        if isinstance(restype, WeldVec) and\
+                not isinstance(restype.elem_type, WeldVec):
+            elem_type = restype.elem_type
+            dtype = weld_type_to_dtype(elem_type)
+            pointer = obj.data
+            size = obj.size
+            array = np.frombuffer(NumPyWeldDecoder._memory_buffer(pointer, size, dtype),
+                    dtype=dtype, count=size)
+            return array
+        else:
+            raise TypeError("Unsupported type {} in NumPy decoder".format(type(obj)))

From 684d83d312d020ea5c0e3364b2b0847ee0e7d8b1 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 11 Oct 2019 17:11:45 +0530
Subject: [PATCH 07/12] Add 1D Numpy encoding tests

---
 weld-python/tests/encoders/__init__.py        |  0
 weld-python/tests/encoders/helpers.py         | 60 ++++++++++++++++
 weld-python/tests/encoders/test_numpy.py      | 71 +++++++++++++++++++
 weld-python/tests/encoders/test_primitives.py | 37 +---------
 weld-python/weld/encoders/numpy.py            |  3 +-
 5 files changed, 135 insertions(+), 36 deletions(-)
 create mode 100644 weld-python/tests/encoders/__init__.py
 create mode 100644 weld-python/tests/encoders/helpers.py
 create mode 100644 weld-python/tests/encoders/test_numpy.py

diff --git a/weld-python/tests/encoders/__init__.py b/weld-python/tests/encoders/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/weld-python/tests/encoders/helpers.py b/weld-python/tests/encoders/helpers.py
new file mode 100644
index 000000000..87c92fa4d
--- /dev/null
+++ b/weld-python/tests/encoders/helpers.py
@@ -0,0 +1,60 @@
+
+import ctypes
+
+def encdec_factory(encoder, decoder, eq=None):
+    """ Returns a function that encodes and decodes a value.
+
+    Parameters
+    ----------
+    encoder : WeldEncoder
+        the encoder class to use.
+    decoder : WeldDecoder
+        the decoder class to use.
+    eq : function (T, T) => bool, optional (default=None)
+        the equality function to use. If this is `None`, the `==` operator is
+        used.
+
+    Returns
+    -------
+    function
+
+    """
+    def encdec(value, ty, assert_equal=True, err=False):
+        """ Helper function that encodes a value and decodes it.
+
+        The function asserts that the original value and the decoded value are
+        equal.
+
+        Parameters
+        ----------
+        value : any
+            The value to encode and decode
+        ty : WeldType
+            the WeldType of the value
+        assert_equal : bool (default True)
+            Checks whether the original value and decoded value are equal.
+        err :  bool (default False)
+            If True, expects an error.
+
+        """
+        enc = encoder()
+        dec = decoder()
+
+        try:
+            result = dec.decode(ctypes.pointer(enc.encode(value, ty)), ty)
+        except Exception as e:
+            if err:
+                return
+            else:
+                raise e
+
+        if err:
+            raise RuntimeError("Expected error during encode/decode")
+
+        if assert_equal:
+            if eq is not None:
+                assert eq(value, result)
+            else:
+                assert value == result
+
+    return encdec
diff --git a/weld-python/tests/encoders/test_numpy.py b/weld-python/tests/encoders/test_numpy.py
new file mode 100644
index 000000000..4b8300b80
--- /dev/null
+++ b/weld-python/tests/encoders/test_numpy.py
@@ -0,0 +1,71 @@
+"""
+Tests NumPy encoders and decoders.
+"""
+
+import ctypes
+import numpy as np
+
+from .helpers import encdec_factory
+from weld.encoders.numpy import NumPyWeldEncoder, NumPyWeldDecoder
+from weld.types import *
+
+encdec = encdec_factory(NumPyWeldEncoder, NumPyWeldDecoder, eq=np.allclose)
+
+def array(dtype, length=5):
+    """Creates a 1D NumPy array with the given data type.
+
+    The array is filled with data [1...length).
+
+    >>> array('int8')
+    array([0, 1, 2, 3, 4], dtype=int8)
+    >>> array('float32')
+    array([0., 1., 2., 3., 4.], dtype=float32)
+
+    Parameters
+    ----------
+    dtype: np.dtype
+        data type of array elements
+    length: int
+        elements in array
+
+    Returns
+    -------
+    np.ndarray
+
+    """
+    return np.arange(start=0, stop=length, dtype=dtype)
+ 
+def test_bool_vec():
+    # Booleans in NumPy, like in Weld, are represented as bytes.
+    encdec(np.array([True, True, False, False, True], dtype='bool'),
+            WeldVec(Bool()))
+
+def test_i8_vec():
+    encdec(array('int8'), WeldVec(I8()))
+
+def test_i16_vec():
+    encdec(array('int16'), WeldVec(I16()))
+
+def test_i32_vec():
+    encdec(array('int32'), WeldVec(I32()))
+
+def test_i64_vec():
+    encdec(array('int64'), WeldVec(I64()))
+
+def test_u8_vec():
+    encdec(array('uint8'), WeldVec(U8()))
+
+def test_u16_vec():
+    encdec(array('uint16'), WeldVec(U16()))
+
+def test_u32_vec():
+    encdec(array('uint32'), WeldVec(U32()))
+
+def test_u64_vec():
+    encdec(array('uint64'), WeldVec(U64()))
+
+def test_float32_vec():
+    encdec(array('float32'), WeldVec(F32()))
+
+def test_float64_vec():
+    encdec(array('float64'), WeldVec(F64()))
diff --git a/weld-python/tests/encoders/test_primitives.py b/weld-python/tests/encoders/test_primitives.py
index 686f8537b..09e551075 100644
--- a/weld-python/tests/encoders/test_primitives.py
+++ b/weld-python/tests/encoders/test_primitives.py
@@ -2,46 +2,13 @@
 Tests primitive encoders and decoders.
 """
 
-import copy
 import ctypes
 
+from .helpers import encdec_factory
 from weld.encoders import PrimitiveWeldEncoder, PrimitiveWeldDecoder
 from weld.types import *
 
-def encdec(value, ty, assert_equal=True, err=False):
-    """ Helper function that encodes a value and decodes it.
-
-    The function asserts that the original value and the decoded value are
-    equal.
-
-    Parameters
-    ----------
-    value : any
-        The value to encode and decode
-    ty : WeldType
-        the WeldType of the value
-    assert_equal : bool (default True)
-        Checks whether the original value and decoded value are equal.
-    err :  bool (default False)
-        If True, expects an error.
-
-    """
-    enc = PrimitiveWeldEncoder()
-    dec = PrimitiveWeldDecoder()
-
-    try:
-        result = dec.decode(ctypes.pointer(enc.encode(value, ty)), ty)
-    except Exception as e:
-        if err:
-            return
-        else:
-            raise e
-
-    if err:
-        raise RuntimeError("Expected error during encode/decode")
-
-    if assert_equal:
-        assert value == result
+encdec = encdec_factory(PrimitiveWeldEncoder, PrimitiveWeldDecoder)
 
 def test_i8_encode():
     encdec(-1, I8())
diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index 6997f30f7..f61501f93 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -163,7 +163,7 @@ class NumPyWeldDecoder(WeldDecoder):
 
     >>> arr = np.array([1,2,3], dtype='int32')
     >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32()))
-    >>> NumPyWeldDecoder().decode(encoded, WeldVec(I32()))
+    >>> NumPyWeldDecoder().decode(ctypes.pointer(encoded), WeldVec(I32()))
     array([1, 2, 3], dtype=int32)
 
     """
@@ -196,6 +196,7 @@ def _memory_buffer(c_pointer, length, dtype):
 
     def decode(self, obj, restype):
         # A 1D NumPy array
+        obj = obj.contents
         if isinstance(restype, WeldVec) and\
                 not isinstance(restype.elem_type, WeldVec):
             elem_type = restype.elem_type

From e89b1bcee71f84768d08c2e90197c6f6407e3a66 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 11 Oct 2019 17:54:15 +0530
Subject: [PATCH 08/12] more numpy decoding

---
 weld-python/weld/compile.py               |  4 +-
 weld-python/weld/encoders/encoder_base.py |  8 ++-
 weld-python/weld/encoders/numpy.py        | 78 +++++++++++++++++++----
 weld-python/weld/encoders/primitives.py   |  2 +-
 4 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/weld-python/weld/compile.py b/weld-python/weld/compile.py
index fa68562c9..b5bb436ba 100644
--- a/weld-python/weld/compile.py
+++ b/weld-python/weld/compile.py
@@ -167,9 +167,9 @@ def func(*args, context=None):
         data = ctypes.cast(result.data(), pointer_type)
 
         if decoder is not None:
-            result = decoder.decode(data, restype)
+            result = decoder.decode(data, restype, context)
         else:
-            result = primitive_decoder.decode(data, restype)
+            result = primitive_decoder.decode(data, restype, context)
         return (result, context)
 
     return func
diff --git a/weld-python/weld/encoders/encoder_base.py b/weld-python/weld/encoders/encoder_base.py
index b1d99e17d..f640d0e49 100644
--- a/weld-python/weld/encoders/encoder_base.py
+++ b/weld-python/weld/encoders/encoder_base.py
@@ -38,7 +38,7 @@ class WeldDecoder(ABC):
     """
 
     @abstractmethod
-    def decode(obj, restype):
+    def decode(self, obj, restype, context):
         """
         Decodes the object, assuming object has the WeldType restype.
 
@@ -51,11 +51,15 @@ def decode(obj, restype):
             An object encoded in the Weld ABI.
         restype : WeldType
             The WeldType of the object that is being decoded.
+        context : WeldContext or None
+            The context backing `obj` if this value was constructed in Weld.
 
         Returns
         -------
         any
-            The decoder can return any Python value.
+            The decoder can return any Python value. If the data is not copied
+            and context is not `None`, the returned object should hold a
+            reference to the context to prevent use-after-free bugs.
 
         """
         pass
diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index f61501f93..43c1e6943 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -1,9 +1,17 @@
 """
 Implements encoders for NumPy values.
 
+The Weld package includes native convertors for NumPy arrays because NumPy is
+the standard way for interacting with C-like array data.
+
+The encoder in this package accepts ndarray or its subclasses. The decoder in
+this module returns a subclass of ndarray called `weldbasearray`, which may
+hold a reference to a `WeldContext`. This prevents arrays backed by memory
+allocated in Weld from being freed before the array's reference count drops to
+0.
+
 Zero-copy conversions (in particular, to 1D arrays) are implemented here
-directly since they only involve a pointer copy. Conversions of types that
-currently require copies are implemented in Rust.
+directly since they only involve a pointer copy.
 """
 
 import ctypes
@@ -12,6 +20,10 @@
 from .encoder_base import *
 from ..types import *
 
+class weldbasearray(np.ndarray):
+    # Not implemented yet.
+    pass
+
 # Maps a string dtype representation to a Weld scalar type.
 _known_types = {
         'int8': I8(),
@@ -51,14 +63,13 @@ def weld_type_to_dtype(ty):
 
     Returns
     -------
-    dtype
+    dtype or None
+        Returns None if the type is not recognized.
     
     """
     if ty in _known_types_weld2dtype:
         return np.dtype(_known_types_weld2dtype[ty])
 
-    raise NotImplementedError("String Weld -> dtype not supported")
-
 def dtype_to_weld_type(ty):
     """Converts a NumPy data type to a Weld type.
 
@@ -158,6 +169,7 @@ def encode(self, obj, ty):
         else:
             raise TypeError("Unexpected type {} in NumPy encoder".format(type(obj)))
 
+
 class NumPyWeldDecoder(WeldDecoder):
     """ Decodes an encoded Weld array into a NumPy array.
 
@@ -194,17 +206,57 @@ def _memory_buffer(c_pointer, length, dtype):
         return buf_from_mem(c_pointer, arr_size, 0x100)
 
 
-    def decode(self, obj, restype):
+    def _numpy_type(weld_type):
+        """Infers the ndarray dimensions and dtype from a WeldVec type.
+
+        Throws a TypeError if the weld_type cannot be represented as an
+        ndarray of some scalar type.
+
+        Parameters
+        ----------
+        weld_type : WeldType
+            The type to check
+
+        Returns
+        -------
+        (int, dtype) tuple
+            The first element is the nubmer of dimensions and the second
+            element is the dtype.
+
+        >>> NumPyWeldDecoder._numpy_type(WeldVec(I8()))
+        (1, dtype('int8'))
+        >>> NumPyWeldDecoder._numpy_type(WeldVec(WeldVec(F32())))
+        (2, dtype('float32'))
+        >>> NumPyWeldDecoder._numpy_type(I32())
+        Traceback (most recent call last):
+        ...
+        TypeError: type cannot be represented as ndarray
+
+        """
+        if not isinstance(weld_type, WeldVec):
+            raise TypeError("type cannot be represented as ndarray")
+
+        dimension = 1
+        elem_type = weld_type.elem_type
+        if isinstance(elem_type, WeldVec):
+            (inner_dims, inner_ty) = NumPyWeldDecoder._numpy_type(elem_type)
+            dimension += inner_dims
+        else:
+            try:
+                inner_ty = weld_type_to_dtype(elem_type)
+            except:
+                raise TypeError("unknown element type {}".format(elem_type))
+        return (dimension, inner_ty)
+
+
+    def decode(self, obj, restype, context=None):
         # A 1D NumPy array
         obj = obj.contents
-        if isinstance(restype, WeldVec) and\
-                not isinstance(restype.elem_type, WeldVec):
+        (dims, dtype) = NumPyWeldDecoder._numpy_type(restype)
+        if dims == 1:
             elem_type = restype.elem_type
-            dtype = weld_type_to_dtype(elem_type)
-            pointer = obj.data
-            size = obj.size
-            array = np.frombuffer(NumPyWeldDecoder._memory_buffer(pointer, size, dtype),
-                    dtype=dtype, count=size)
+            buf = NumPyWeldDecoder._memory_buffer(obj.data, obj.size, dtype)
+            array = np.frombuffer(buf, dtype=dtype, count=obj.size)
             return array
         else:
             raise TypeError("Unsupported type {} in NumPy decoder".format(type(obj)))
diff --git a/weld-python/weld/encoders/primitives.py b/weld-python/weld/encoders/primitives.py
index d6a8178bf..8c7a6f36b 100644
--- a/weld-python/weld/encoders/primitives.py
+++ b/weld-python/weld/encoders/primitives.py
@@ -65,7 +65,7 @@ class PrimitiveWeldDecoder(WeldDecoder):
     >>> decoder.decode(ctypes.pointer(x), struct_type)
     (1, 1.0)
     """
-    def decode(self, obj, restype):
+    def decode(self, obj, restype, context=None):
         if isinstance(restype, Bool):
             return bool(obj.contents.value)
         elif isinstance(restype, WeldStruct):

From e153606820750c1e0759e134877d2d7631f7b965 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 11 Oct 2019 17:57:24 +0530
Subject: [PATCH 09/12] fix a comment

---
 weld-python/weld/encoders/numpy.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index 43c1e6943..51547eee9 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -207,10 +207,10 @@ def _memory_buffer(c_pointer, length, dtype):
 
 
     def _numpy_type(weld_type):
-        """Infers the ndarray dimensions and dtype from a WeldVec type.
+        """Infers the ndarray dimensions and dtype from a Weld type.
 
-        Throws a TypeError if the weld_type cannot be represented as an
-        ndarray of some scalar type.
+        Throws a TypeError if the weld_type cannot be represented as an ndarray
+        of some scalar type.
 
         Parameters
         ----------

From 1d45eaf6990b92dabd3a39b3df97a69905485d21 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 11 Oct 2019 19:16:20 +0530
Subject: [PATCH 10/12] fill in weldbasearray

---
 weld-python/Cargo.toml             |  1 -
 weld-python/weld/encoders/numpy.py | 58 +++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/weld-python/Cargo.toml b/weld-python/Cargo.toml
index 5772ed6c8..25fd9c22d 100644
--- a/weld-python/Cargo.toml
+++ b/weld-python/Cargo.toml
@@ -7,7 +7,6 @@ edition = "2018"
 [dependencies]
 libc = "0.2.0"
 weld = { path = "../weld" }
-numpy = "0.7.0"
 
 [dependencies.pyo3]
 version = "0.8.0"
diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index 51547eee9..bef238e15 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -21,8 +21,58 @@
 from ..types import *
 
 class weldbasearray(np.ndarray):
-    # Not implemented yet.
-    pass
+    """ A NumPy array possibly backed by a `WeldContext`.
+
+    This class is a wrapper around the NumPy `ndarray` class, but it contains
+    an additional `weld_context` attribute. This attribute references the
+    memory that backs the array, if the array was returned by Weld (or created
+    from another array that was returned by Weld). It prevents memory owned by
+    the context from being freed before all references to the array are
+    deleted.
+
+    This class also contains an additional method, `copy2numpy`, which
+    deep-copies the data referenced by this array to a regular `ndarray`. The
+    resulting array does not hold a reference to the context or the original
+    array.
+
+    If the `weld_context` attribtue is `None`, this class acts like a regular
+    `ndarray`, and the `copy2numpy` function simply copies this array.
+
+    """
+
+    def __new__(cls, input_array, weld_context=None):
+        """ Instance initializer.
+
+        Parameters
+        ----------
+        weld_context : WeldContext or None
+            If this is not `None`, it should be the context that owns the
+            memory for `input_array`.
+
+        """
+        obj = np.asarray(input_array).view(cls)
+        obj.weld_context = weld_context
+        return obj
+
+    def __array_finalize__(self, obj):
+        """ Finalizes array. See the NumPy documentation. """
+        if obj is None:
+            return
+        self.weld_context = getattr(obj, 'weld_context', None)
+
+    def copy2numpy(self):
+        """ Copies this array's data into a new NumPy `ndarray`.
+
+        Examples
+        --------
+        >>> arr = weldbasearray([1, 2, 3])
+        >>> arr
+        weldbasearray([1, 2, 3])
+        >>> arr.copy2numpy()
+        array([1, 2, 3])
+
+        """
+        return np.array(self, copy=True).view(np.ndarray)
 
 # Maps a string dtype representation to a Weld scalar type.
 _known_types = {
@@ -176,7 +226,7 @@ class NumPyWeldDecoder(WeldDecoder):
     >>> arr = np.array([1,2,3], dtype='int32')
     >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32()))
     >>> NumPyWeldDecoder().decode(ctypes.pointer(encoded), WeldVec(I32()))
-    array([1, 2, 3], dtype=int32)
+    weldbasearray([1, 2, 3], dtype=int32)
 
     """
 
@@ -257,6 +307,6 @@ def decode(self, obj, restype, context=None):
             elem_type = restype.elem_type
             buf = NumPyWeldDecoder._memory_buffer(obj.data, obj.size, dtype)
             array = np.frombuffer(buf, dtype=dtype, count=obj.size)
-            return array
+            return weldbasearray(array, weld_context=context)
         else:
             raise TypeError("Unsupported type {} in NumPy decoder".format(type(obj)))

From 28516d398932ca7dd3d057f9913d803a6973968b Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 11 Oct 2019 19:25:12 +0530
Subject: [PATCH 11/12] add some sanity tests for weldbasearray

---
 weld-python/tests/encoders/test_numpy.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/weld-python/tests/encoders/test_numpy.py b/weld-python/tests/encoders/test_numpy.py
index 4b8300b80..b7fe92637 100644
--- a/weld-python/tests/encoders/test_numpy.py
+++ b/weld-python/tests/encoders/test_numpy.py
@@ -6,7 +6,9 @@
 import numpy as np
 
 from .helpers import encdec_factory
-from weld.encoders.numpy import NumPyWeldEncoder, NumPyWeldDecoder
+
+from weld import WeldConf, WeldContext
+from weld.encoders.numpy import weldbasearray, NumPyWeldEncoder, NumPyWeldDecoder
 from weld.types import *
 
 encdec = encdec_factory(NumPyWeldEncoder, NumPyWeldDecoder, eq=np.allclose)
@@ -34,6 +36,25 @@ def array(dtype, length=5):
 
     """
     return np.arange(start=0, stop=length, dtype=dtype)
+
+
+# Tests for ensuring weldbasearrays propagate their contexts properly
+
+def test_baseweldarray_basics():
+    x = np.array([1, 2, 3, 4, 5], dtype="int8")
+
+    ctx = WeldContext(WeldConf())
+
+    welded = weldbasearray(x, weld_context=ctx)
+    assert welded.dtype == "int8"
+    assert welded.weld_context is ctx
+
+    sliced = welded[1:]
+    assert np.allclose(sliced, np.array([2,3,4,5]))
+    assert sliced.base is welded
+    assert sliced.weld_context is ctx
+
+# Tests for encoding and decoding 1D arrays
  
 def test_bool_vec():
     # Booleans in NumPy, like in Weld, are represented as bytes.

From 42eebff76f00ffff023d1f25a7a6d6a5fa6276d8 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Fri, 11 Oct 2019 19:32:21 +0530
Subject: [PATCH 12/12] copy2numpy test

---
 weld-python/tests/encoders/test_numpy.py | 8 ++++++++
 weld-python/weld/encoders/numpy.py       | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/weld-python/tests/encoders/test_numpy.py b/weld-python/tests/encoders/test_numpy.py
index b7fe92637..70c04021c 100644
--- a/weld-python/tests/encoders/test_numpy.py
+++ b/weld-python/tests/encoders/test_numpy.py
@@ -54,6 +54,14 @@ def test_baseweldarray_basics():
     assert sliced.base is welded
     assert sliced.weld_context is ctx
 
+    copied = sliced.copy2numpy()
+    assert copied.base is None
+    try:
+        copied.ctx
+        assert False
+    except AttributeError as e:
+        pass
+
 # Tests for encoding and decoding 1D arrays
  
 def test_bool_vec():
diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index bef238e15..64c5a2017 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -63,6 +63,8 @@ def __array_finalize__(self, obj):
     def copy2numpy(self):
         """ Copies this array's data into a new NumPy `ndarray`.
 
+        This is an alias for `np.array(arr, copy=True)`
+
         Examples
         --------
         >>> arr = weldbasearray([1, 2, 3])
@@ -72,7 +74,7 @@ def copy2numpy(self):
         array([1, 2, 3])
 
         """
-        return np.array(self, copy=True).view(np.ndarray)
+        return np.array(self, copy=True)
 
 # Maps a string dtype representation to a Weld scalar type.
 _known_types = {