From 125d444299a30c4773b488496184fae3cbf660db Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 30 Oct 2023 23:13:06 -0400 Subject: [PATCH 1/4] Implement Map type --- README.md | 2 +- src/field.ts | 7 +++++++ src/vector.ts | 30 ++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2183c68..dd39091 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ Most of the unsupported types should be pretty straightforward to implement; the - [x] Large List (Not implemented by Arrow JS but supported by downcasting to `List`.) - [x] Fixed-size List - [x] Struct -- [ ] Map +- [x] Map - [ ] Dense Union - [ ] Sparse Union - [ ] Dictionary-encoded arrays diff --git a/src/field.ts b/src/field.ts index c10d88f..314f62a 100644 --- a/src/field.ts +++ b/src/field.ts @@ -155,6 +155,13 @@ export function parseField(buffer: ArrayBuffer, ptr: number): arrow.Field { return new arrow.Field(name, type, flags.nullable, metadata); } + // Map + if (formatString === "+m") { + assert(childrenFields.length === 1); + const type = new arrow.Map_(childrenFields[0], flags.mapKeysSorted); + return new arrow.Field(name, type, flags.nullable, metadata); + } + throw new Error(`Unsupported format: ${formatString}`); } diff --git a/src/vector.ts b/src/vector.ts index 6a0e573..1a65b36 100644 --- a/src/vector.ts +++ b/src/vector.ts @@ -546,6 +546,36 @@ export function parseData( }); } + if (DataType.isMap(dataType)) { + assert(nChildren === 1); + const [validityPtr, offsetsPtr] = bufferPtrs; + const nullBitmap = parseNullBitmap( + dataView.buffer, + validityPtr, + length, + copy + ); + const valueOffsets = copy + ? new Int32Array( + copyBuffer( + dataView.buffer, + offsetsPtr, + (length + 1) * Int32Array.BYTES_PER_ELEMENT + ) + ) + : new Int32Array(dataView.buffer, offsetsPtr, length + 1); + + return arrow.makeData({ + type: dataType, + offset, + length, + nullCount, + nullBitmap, + valueOffsets, + child: children[0], + }); + } + // TODO: sparse union, dense union, dictionary throw new Error(`Unsupported type ${dataType}`); } From 2fb1a4835e9eab340c06e26883bd097db1ad7dd6 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 30 Oct 2023 23:19:32 -0400 Subject: [PATCH 2/4] add test --- tests/ffi.test.ts | 37 +++++++++++++++++++++++++++++++++ tests/pyarrow_generate_data.py | 10 +++++++++ tests/table.arrow | Bin 3426 -> 4098 bytes yarn.lock | 8 +++---- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/tests/ffi.test.ts b/tests/ffi.test.ts index c3be397..ac42693 100644 --- a/tests/ffi.test.ts +++ b/tests/ffi.test.ts @@ -661,3 +661,40 @@ describe("nullable int", (t) => { it("copy=false", () => test(false)); it("copy=true", () => test(true)); }); + +describe("map array", (t) => { + function test(copy: boolean) { + let columnIndex = TEST_TABLE.schema.fields.findIndex( + (field) => field.name == "map_array" + ); + + const originalField = TEST_TABLE.schema.fields[columnIndex]; + // declare it's not null + const originalVector = TEST_TABLE.getChildAt(columnIndex) as arrow.Vector; + const fieldPtr = FFI_TABLE.schemaAddr(columnIndex); + const field = parseField(WASM_MEMORY.buffer, fieldPtr); + + expect(field.name).toStrictEqual(originalField.name); + expect(field.typeId).toStrictEqual(originalField.typeId); + expect(field.nullable).toStrictEqual(originalField.nullable); + + const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex); + const wasmVector = parseVector( + WASM_MEMORY.buffer, + arrayPtr, + field.type, + copy + ); + + console.log(originalVector); + console.log(wasmVector); + + // expect( + // validityEqual(originalVector, wasmVector), + // "validity should be equal" + // ).toBeTruthy(); + } + + it("copy=false", () => test(false)); + it("copy=true", () => test(true)); +}); diff --git a/tests/pyarrow_generate_data.py b/tests/pyarrow_generate_data.py index a5613f6..04654f3 100644 --- a/tests/pyarrow_generate_data.py +++ b/tests/pyarrow_generate_data.py @@ -124,6 +124,15 @@ def nullable_int() -> pa.Array: return arr +def map_array() -> pa.Array: + offsets = [0, 2, 3, 4] + keys = ["a", "b", "a", "b"] + items = [1, 2, 3, 4] + arr = pa.MapArray.from_arrays(offsets, keys, items) + assert isinstance(arr, pa.MapArray) + return arr + + class MyExtensionType(pa.ExtensionType): """ Refer to https://arrow.apache.org/docs/python/extending_types.html for @@ -170,6 +179,7 @@ def table() -> pa.Table: "date64": date64_array(), "timestamp": timestamp_array(), "nullable_int": nullable_int(), + "map": map_array(), } ) diff --git a/tests/table.arrow b/tests/table.arrow index ec0d119b13d9b433ce67c158e5a642483a96e375..fc488ca0e5728259b9db374e6baad14d51743fd2 100644 GIT binary patch delta 949 zcmc&zJxc>Y5S=rd+$kDPG#V`QurcXkrsuNBE}H>0wHiH7PetiSmaMg zVz0fWMM|+Tg}p_@_08p0Lqx2^2annLc)N46drwp6PJB6+Tici?Qh*(;#;5)IyHbLP z;$RVr#|Tjqyaczv4EQiWWPweC=mOjX1$Yk)Yik{)h>X#3tdn63X|h4cY2Pi=>YGwm zI2G;)*Ek@wGlYF<2V}7cdZnX-UGI=85WIA%rHS<@q9<(RJ&bfe*lGkER-U`uZ ziI(9f`RjV1{qST;_2qx^xjRNStk<2c^y#(l1l1G^OrmiU8#81pW~a5ToBsOZZ@#=U zU;Wx*mPbkbmTw}!=YoRU;koG(xG_%m#@xJ{XPs_Z%ejW3wLEFK^4UuJ4qqye36lja X?uO})pXJhT@A6-t3mZpgs!9|$~HCi2NSqySkg3=9msK)i&Rfk6U@-!L&S901}eK%4=@ zJV5Ng2owjZ*#pEAfY=6zLHafe@VsE2EW^sdl)*e%ja5lf0;muIxELfDcz}vH7`T7} zGLsWog(u%&6_`8$jCnRMV7": version: 5.2.2 - resolution: "typescript@patch:typescript@npm%3A5.2.2#~builtin::version=5.2.2&hash=f3b441" + resolution: "typescript@patch:typescript@npm%3A5.2.2#~builtin::version=5.2.2&hash=ad5954" bin: tsc: bin/tsc tsserver: bin/tsserver - checksum: 0f4da2f15e6f1245e49db15801dbee52f2bbfb267e1c39225afdab5afee1a72839cd86000e65ee9d7e4dfaff12239d28beaf5ee431357fcced15fb08583d72ca + checksum: 07106822b4305de3f22835cbba949a2b35451cad50888759b6818421290ff95d522b38ef7919e70fb381c5fe9c1c643d7dea22c8b31652a717ddbd57b7f4d554 languageName: node linkType: hard From 679ee7e5e02b86c71abab105d32473f286d69cb2 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 30 Jan 2024 23:40:28 -0500 Subject: [PATCH 3/4] readme note --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 82c3bd5..21eb10d 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ Most of the unsupported types should be pretty straightforward to implement; the - [x] Large List (Not implemented by Arrow JS but supported by downcasting to `List`.) - [x] Fixed-size List - [x] Struct -- [x] Map +- [x] Map (though not yet tested, see [#97](https://github.com/kylebarron/arrow-js-ffi/issues/97)) - [x] Dense Union - [x] Sparse Union - [x] Dictionary-encoded arrays From 9a2012997155af8fd8743b1452937090d83c3628 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Tue, 30 Jan 2024 23:47:11 -0500 Subject: [PATCH 4/4] fmt --- src/vector.ts | 94 +++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/vector.ts b/src/vector.ts index 5a05682..403352e 100644 --- a/src/vector.ts +++ b/src/vector.ts @@ -24,7 +24,7 @@ export function parseVector( buffer: ArrayBuffer, ptr: number, dataType: T, - copy: boolean = true + copy: boolean = true, ): arrow.Vector { const data = parseData(buffer, ptr, dataType, copy); return arrow.makeVector(data); @@ -51,7 +51,7 @@ export function parseData( buffer: ArrayBuffer, ptr: number, dataType: T, - copy: boolean = true + copy: boolean = true, ): arrow.Data { const dataView = new DataView(buffer); @@ -77,7 +77,7 @@ export function parseData( buffer, dataView.getUint32(ptrToChildrenPtrs + i * 4, true), dataType.children[i].type, - copy + copy, ); } @@ -166,7 +166,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const byteLength = (length * dataType.bitWidth) / 8; const data = copy @@ -188,7 +188,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); // bitwidth doesn't exist on float types I guess const byteLength = length * dataType.ArrayType.BYTES_PER_ELEMENT; @@ -211,7 +211,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); // Boolean arrays are bit-packed. This means the byte length should be the number of elements, @@ -237,7 +237,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const byteLength = (length * dataType.bitWidth) / 8; const data = copy @@ -259,13 +259,13 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); let byteWidth = getDateByteWidth(dataType); const data = copy ? new dataType.ArrayType( - copyBuffer(dataView.buffer, dataPtr, length * byteWidth) + copyBuffer(dataView.buffer, dataPtr, length * byteWidth), ) : new dataType.ArrayType(dataView.buffer, dataPtr, length); return arrow.makeData({ @@ -284,7 +284,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const byteLength = (length * dataType.bitWidth) / 8; const data = copy @@ -306,13 +306,13 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); let byteWidth = getTimeByteWidth(dataType); const data = copy ? new dataType.ArrayType( - copyBuffer(dataView.buffer, dataPtr, length * byteWidth) + copyBuffer(dataView.buffer, dataPtr, length * byteWidth), ) : new dataType.ArrayType(dataView.buffer, dataPtr, length); return arrow.makeData({ @@ -331,13 +331,13 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); let byteWidth = getTimeByteWidth(dataType); const data = copy ? new dataType.ArrayType( - copyBuffer(dataView.buffer, dataPtr, length * byteWidth) + copyBuffer(dataView.buffer, dataPtr, length * byteWidth), ) : new dataType.ArrayType(dataView.buffer, dataPtr, length); return arrow.makeData({ @@ -356,7 +356,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); // What's the bitwidth here? @@ -382,7 +382,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const valueOffsets = copy @@ -390,8 +390,8 @@ function parseDataContent({ copyBuffer( dataView.buffer, offsetsPtr, - (length + 1) * Int32Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int32Array.BYTES_PER_ELEMENT, + ), ) : new Int32Array(dataView.buffer, offsetsPtr, length + 1); @@ -419,14 +419,14 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); // The original value offsets are an Int64Array, which Arrow JS does not yet support natively const originalValueOffsets = new BigInt64Array( dataView.buffer, offsetsPtr, - length + 1 + length + 1, ); // Copy the Int64Array to an Int32Array @@ -462,7 +462,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const valueOffsets = copy @@ -470,8 +470,8 @@ function parseDataContent({ copyBuffer( dataView.buffer, offsetsPtr, - (length + 1) * Int32Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int32Array.BYTES_PER_ELEMENT, + ), ) : new Int32Array(dataView.buffer, offsetsPtr, length + 1); @@ -499,14 +499,14 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); // The original value offsets are an Int64Array, which Arrow JS does not yet support natively const originalValueOffsets = new BigInt64Array( dataView.buffer, offsetsPtr, - length + 1 + length + 1, ); // Copy the Int64Array to an Int32Array @@ -542,16 +542,16 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const data = copy ? new dataType.ArrayType( - copyBuffer(dataView.buffer, dataPtr, length * dataType.byteWidth) + copyBuffer(dataView.buffer, dataPtr, length * dataType.byteWidth), ) : new dataType.ArrayType( dataView.buffer, dataPtr, - length * dataType.byteWidth + length * dataType.byteWidth, ); return arrow.makeData({ type: dataType, @@ -570,15 +570,15 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const valueOffsets = copy ? new Int32Array( copyBuffer( dataView.buffer, offsetsPtr, - (length + 1) * Int32Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int32Array.BYTES_PER_ELEMENT, + ), ) : new Int32Array(dataView.buffer, offsetsPtr, length + 1); @@ -601,14 +601,14 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); // The original value offsets are an Int64Array, which Arrow JS does not yet support natively const originalValueOffsets = new BigInt64Array( dataView.buffer, offsetsPtr, - length + 1 + length + 1, ); // Copy the Int64Array to an Int32Array @@ -637,7 +637,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); return arrow.makeData({ @@ -656,7 +656,7 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); return arrow.makeData({ @@ -676,15 +676,15 @@ function parseDataContent({ dataView.buffer, validityPtr, length, - copy + copy, ); const valueOffsets = copy ? new Int32Array( copyBuffer( dataView.buffer, offsetsPtr, - (length + 1) * Int32Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int32Array.BYTES_PER_ELEMENT, + ), ) : new Int32Array(dataView.buffer, offsetsPtr, length + 1); @@ -707,8 +707,8 @@ function parseDataContent({ copyBuffer( dataView.buffer, offsetsPtr, - (length + 1) * Int32Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int32Array.BYTES_PER_ELEMENT, + ), ) : new Int32Array(dataView.buffer, offsetsPtr, length + 1); @@ -717,8 +717,8 @@ function parseDataContent({ copyBuffer( dataView.buffer, typeIdsPtr, - (length + 1) * Int8Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int8Array.BYTES_PER_ELEMENT, + ), ) : new Int8Array(dataView.buffer, typeIdsPtr, length + 1); @@ -741,8 +741,8 @@ function parseDataContent({ copyBuffer( dataView.buffer, typeIdsPtr, - (length + 1) * Int8Array.BYTES_PER_ELEMENT - ) + (length + 1) * Int8Array.BYTES_PER_ELEMENT, + ), ) : new Int8Array(dataView.buffer, typeIdsPtr, length + 1); @@ -770,7 +770,7 @@ function getDateByteWidth(type: arrow.Date_): number { } function getTimeByteWidth( - type: arrow.Time | arrow.Timestamp | arrow.Duration + type: arrow.Time | arrow.Timestamp | arrow.Duration, ): number { switch (type.unit) { case arrow.TimeUnit.SECOND: @@ -787,7 +787,7 @@ function parseNullBitmap( buffer: ArrayBuffer, validityPtr: number, length: number, - copy: boolean + copy: boolean, ): NullBitmap { if (validityPtr === 0) { return null; @@ -807,7 +807,7 @@ function parseNullBitmap( function copyBuffer( buffer: ArrayBuffer, ptr: number, - byteLength: number + byteLength: number, ): ArrayBuffer { const newBuffer = new ArrayBuffer(byteLength); const newBufferView = new Uint8Array(newBuffer);