diff --git a/README.md b/README.md index 32ce846..21eb10d 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ Most of the unsupported types should be pretty straightforward to implement; the - [x] Large List (Not implemented by Arrow JS but supported by downcasting to `List`.) - [x] Fixed-size List - [x] Struct -- [ ] Map +- [x] Map (though not yet tested, see [#97](https://github.com/kylebarron/arrow-js-ffi/issues/97)) - [x] Dense Union - [x] Sparse Union - [x] Dictionary-encoded arrays diff --git a/src/field.ts b/src/field.ts index 409a08a..1da6ffd 100644 --- a/src/field.ts +++ b/src/field.ts @@ -222,6 +222,13 @@ function parseFieldContent({ return new arrow.Field(name, type, flags.nullable, metadata); } + // Map + if (formatString === "+m") { + assert(childrenFields.length === 1); + const type = new arrow.Map_(childrenFields[0], flags.mapKeysSorted); + return new arrow.Field(name, type, flags.nullable, metadata); + } + // Dense union if (formatString.slice(0, 4) === "+ud:") { const typeIds = formatString.slice(4).split(",").map(Number); diff --git a/src/vector.ts b/src/vector.ts index 404afe3..403352e 100644 --- a/src/vector.ts +++ b/src/vector.ts @@ -669,6 +669,36 @@ function parseDataContent({ }); } + if (DataType.isMap(dataType)) { + assert(nChildren === 1); + const [validityPtr, offsetsPtr] = bufferPtrs; + const nullBitmap = parseNullBitmap( + dataView.buffer, + validityPtr, + length, + copy, + ); + const valueOffsets = copy + ? new Int32Array( + copyBuffer( + dataView.buffer, + offsetsPtr, + (length + 1) * Int32Array.BYTES_PER_ELEMENT, + ), + ) + : new Int32Array(dataView.buffer, offsetsPtr, length + 1); + + return arrow.makeData({ + type: dataType, + offset, + length, + nullCount, + nullBitmap, + valueOffsets, + child: children[0], + }); + } + if (DataType.isDenseUnion(dataType)) { const [typeIdsPtr, offsetsPtr] = bufferPtrs; diff --git a/tests/ffi.test.ts b/tests/ffi.test.ts index a934987..7c5edcd 100644 --- a/tests/ffi.test.ts +++ b/tests/ffi.test.ts @@ -683,6 +683,45 @@ describe("nullable int", (t) => { it("copy=true", () => test(true)); }); +// Skipping this test because of rust issues +// ref: https://github.com/kylebarron/arrow-js-ffi/issues/97 +describe.skip("map array", (t) => { + function test(copy: boolean) { + let columnIndex = TEST_TABLE.schema.fields.findIndex( + (field) => field.name == "map_array" + ); + + const originalField = TEST_TABLE.schema.fields[columnIndex]; + // declare it's not null + const originalVector = TEST_TABLE.getChildAt(columnIndex) as arrow.Vector; + const fieldPtr = FFI_TABLE.schemaAddr(columnIndex); + const field = parseField(WASM_MEMORY.buffer, fieldPtr); + + expect(field.name).toStrictEqual(originalField.name); + expect(field.typeId).toStrictEqual(originalField.typeId); + expect(field.nullable).toStrictEqual(originalField.nullable); + + const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex); + const wasmVector = parseVector( + WASM_MEMORY.buffer, + arrayPtr, + field.type, + copy + ); + + console.log(originalVector); + console.log(wasmVector); + + // expect( + // validityEqual(originalVector, wasmVector), + // "validity should be equal" + // ).toBeTruthy(); + } + + it("copy=false", () => test(false)); + it("copy=true", () => test(true)); +}); + describe("dictionary encoded string", (t) => { function test(copy: boolean) { let columnIndex = TEST_TABLE.schema.fields.findIndex( diff --git a/tests/pyarrow_generate_data.py b/tests/pyarrow_generate_data.py index b853c75..e6210ac 100644 --- a/tests/pyarrow_generate_data.py +++ b/tests/pyarrow_generate_data.py @@ -140,6 +140,15 @@ def nullable_int() -> pa.Array: return arr +def map_array() -> pa.Array: + offsets = [0, 2, 3, 4] + keys = ["a", "b", "a", "b"] + items = [1, 2, 3, 4] + arr = pa.MapArray.from_arrays(offsets, keys, items) + assert isinstance(arr, pa.MapArray) + return arr + + def sparse_union_array() -> pa.Array: """Generate a sparse union array @@ -251,6 +260,9 @@ def table() -> pa.Table: "date64": date64_array(), "timestamp": timestamp_array(), "nullable_int": nullable_int(), + # We don't yet include "map" in tests because of issues on the Rust side. + # ref: https://github.com/kylebarron/arrow-js-ffi/issues/97 + # "map": map_array(), "sparse_union": sparse_union_array(), "dense_union": dense_union_array(), "duration": duration_array(), diff --git a/tests/table.arrow b/tests/table.arrow index 4fb305b..241797e 100644 Binary files a/tests/table.arrow and b/tests/table.arrow differ diff --git a/yarn.lock b/yarn.lock index 2a7314e..7db2283 100644 --- a/yarn.lock +++ b/yarn.lock @@ -338,10 +338,10 @@ __metadata: linkType: hard "@rollup/plugin-typescript@npm:^11.1.2": - version: 11.1.6 - resolution: "@rollup/plugin-typescript@npm:11.1.6" + version: 11.1.5 + resolution: "@rollup/plugin-typescript@npm:11.1.5" dependencies: - "@rollup/pluginutils": "npm:^5.1.0" + "@rollup/pluginutils": "npm:^5.0.1" resolve: "npm:^1.22.1" peerDependencies: rollup: ^2.14.0||^3.0.0||^4.0.0 @@ -352,23 +352,23 @@ __metadata: optional: true tslib: optional: true - checksum: 4ae4d6cfc929393171288df2f18b5eb837fa53d8689118d9661b3064567341f6f6cf8389af55f1d5f015e3682abf30a64ab609fdf75ecb5a84224505e407eb69 + checksum: 3048c7837bcaae3770e6977aa22b34b7da3862f14f25a22249c83867be1a3f78138c0a62b28efd22037890dfd6933e960007da6667a945dcef45f585a581f1aa languageName: node linkType: hard -"@rollup/pluginutils@npm:^5.1.0": - version: 5.1.0 - resolution: "@rollup/pluginutils@npm:5.1.0" +"@rollup/pluginutils@npm:^5.0.1": + version: 5.0.2 + resolution: "@rollup/pluginutils@npm:5.0.2" dependencies: "@types/estree": "npm:^1.0.0" estree-walker: "npm:^2.0.2" picomatch: "npm:^2.3.1" peerDependencies: - rollup: ^1.20.0||^2.0.0||^3.0.0||^4.0.0 + rollup: ^1.20.0||^2.0.0||^3.0.0 peerDependenciesMeta: rollup: optional: true - checksum: abb15eaec5b36f159ec351b48578401bedcefdfa371d24a914cfdbb1e27d0ebfbf895299ec18ccc343d247e71f2502cba21202bc1362d7ef27d5ded699e5c2b2 + checksum: 7aebf04d5d25d7d2e9514cc8f81a49b11f093b29eae2862da29022532b66e3de4681f537cc785fdcf438bcdefa3af4453470e7951ca91d6ebea2f41d6aea42d3 languageName: node linkType: hard @@ -2597,8 +2597,8 @@ __metadata: linkType: hard "ts-node@npm:^10.9.1": - version: 10.9.2 - resolution: "ts-node@npm:10.9.2" + version: 10.9.1 + resolution: "ts-node@npm:10.9.1" dependencies: "@cspotcode/source-map-support": "npm:^0.8.0" "@tsconfig/node10": "npm:^1.0.7" @@ -2630,7 +2630,7 @@ __metadata: ts-node-script: dist/bin-script.js ts-node-transpile-only: dist/bin-transpile.js ts-script: dist/bin-script-deprecated.js - checksum: a91a15b3c9f76ac462f006fa88b6bfa528130dcfb849dd7ef7f9d640832ab681e235b8a2bc58ecde42f72851cc1d5d4e22c901b0c11aa51001ea1d395074b794 + checksum: bee56d4dc96ccbafc99dfab7b73fbabc62abab2562af53cdea91c874a301b9d11e42bc33c0a032a6ed6d813dbdc9295ec73dde7b73ea4ebde02b0e22006f7e04 languageName: node linkType: hard @@ -2649,22 +2649,22 @@ __metadata: linkType: hard "typescript@npm:^5.2.2": - version: 5.3.3 - resolution: "typescript@npm:5.3.3" + version: 5.2.2 + resolution: "typescript@npm:5.2.2" bin: tsc: bin/tsc tsserver: bin/tsserver - checksum: 6e4e6a14a50c222b3d14d4ea2f729e79f972fa536ac1522b91202a9a65af3605c2928c4a790a4a50aa13694d461c479ba92cedaeb1e7b190aadaa4e4b96b8e18 + checksum: d65e50eb849bd21ff8677e5b9447f9c6e74777e346afd67754934264dcbf4bd59e7d2473f6062d9a015d66bd573311166357e3eb07fea0b52859cf9bb2b58555 languageName: node linkType: hard "typescript@patch:typescript@npm%3A^5.2.2#optional!builtin": - version: 5.3.3 - resolution: "typescript@patch:typescript@npm%3A5.3.3#optional!builtin::version=5.3.3&hash=e012d7" + version: 5.2.2 + resolution: "typescript@patch:typescript@npm%3A5.2.2#optional!builtin::version=5.2.2&hash=f3b441" bin: tsc: bin/tsc tsserver: bin/tsserver - checksum: c93786fcc9a70718ba1e3819bab56064ead5817004d1b8186f8ca66165f3a2d0100fee91fa64c840dcd45f994ca5d615d8e1f566d39a7470fc1e014dbb4cf15d + checksum: f79cc2ba802c94c2b78dbb00d767a10adb67368ae764709737dc277273ec148aa4558033a03ce901406b35fddf4eac46dabc94a1e1d12d2587e2b9cfe5707b4a languageName: node linkType: hard