From 64acef601736a0b7681c03679c36f35a50896cbb Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Fri, 12 Apr 2024 15:09:15 +0800 Subject: [PATCH] fix: Fix import array via bulkwriter (#2035) (#2036) When import via bulkwriter, for array type, we should specify datatype; otherwise, it will default to double(for float32 array) and int64(for int8, int16, int32 array). issue: https://github.com/milvus-io/pymilvus/issues/2034, https://github.com/milvus-io/milvus/issues/31834 pr: https://github.com/milvus-io/pymilvus/pull/2035 Signed-off-by: bigsheeper --- pymilvus/bulk_writer/buffer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pymilvus/bulk_writer/buffer.py b/pymilvus/bulk_writer/buffer.py index 12c971d4e..e77723777 100644 --- a/pymilvus/bulk_writer/buffer.py +++ b/pymilvus/bulk_writer/buffer.py @@ -212,6 +212,12 @@ def _persist_parquet(self, local_path: str, **kwargs): for val in self._buffer[k]: arr.append(np.array(val, dtype=np.dtype("uint8"))) data[k] = pd.Series(arr) + elif field_schema.dtype == DataType.ARRAY: + dt = NUMPY_TYPE_CREATOR[field_schema.element_type.name] + arr = [] + for val in self._buffer[k]: + arr.append(np.array(val, dtype=dt)) + data[k] = pd.Series(arr) elif field_schema.dtype.name in NUMPY_TYPE_CREATOR: dt = NUMPY_TYPE_CREATOR[field_schema.dtype.name] data[k] = pd.Series(self._buffer[k], dtype=dt)