Skip to content

Commit

Permalink
Merge branch 'main' into kyle/map-type
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Jan 31, 2024
2 parents 2fb1a48 + 83aa1ba commit 1131edb
Show file tree
Hide file tree
Showing 18 changed files with 1,651 additions and 920 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Publish to NPM

on:
push:
tags:
- "v*"

jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v3
with:
node-version: "20"
- run: npm ci
- run: npm test
- uses: JS-DevTools/npm-publish@v3
with:
token: ${{ secrets.NPM_TOKEN }}
29 changes: 25 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,30 @@ name: Build and Test
on:
push:
branches:
- master
- main
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
node-test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Volta
uses: volta-cli/action@v4

- uses: actions/cache@v4
id: yarn-cache # use this to check for `cache-hit` (`steps.yarn-cache.outputs.cache-hit != 'true'`)
with:
path: ".yarn/cache"
key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }}
restore-keys: |
${{ runner.os }}-yarn-
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
Expand All @@ -31,8 +46,14 @@ jobs:
- name: Build Rust wasm test helper
run: cd tests/rust-arrow-ffi && yarn build && cd ../../

- name: Install dev dependencies
run: yarn
- name: Install
run: yarn install

- name: Prettier check
run: yarn fmt:check

- name: Type check
run: yarn typecheck

- name: Run Node tests
- name: Test
run: yarn test
4 changes: 4 additions & 0 deletions .yarnrc.yml
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
compressionLevel: mixed

enableGlobalCache: false

nodeLinker: node-modules
99 changes: 84 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,52 @@ const WASM_MEMORY: WebAssembly.Memory = ...
const field = parseField(WASM_MEMORY.buffer, fieldPtr);
```

### `parseSchema`

Parse an [`ArrowSchema`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowschema-structure) C FFI struct into an `arrow.Schema` instance. Note that the underlying field **must** be a `Struct` type. In essence a `Struct` field is used to mimic a `Schema` while only being one field.

- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from.
- `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located.

```js
const WASM_MEMORY: WebAssembly.Memory = ...
const schema = parseSchema(WASM_MEMORY.buffer, fieldPtr);
```

### `parseData`

Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct into an [`arrow.Data`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Data.html) instance. Multiple `Data` instances can be joined to make an [`arrow.Vector`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Vector.html).

- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from.
- `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located.
- `dataType` (`arrow.DataType`): The type of the vector to parse. This is retrieved from `field.type` on the result of `parseField`.
- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Data` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes.

#### Example

```ts
const WASM_MEMORY: WebAssembly.Memory = ...
const copiedData = parseData(WASM_MEMORY.buffer, arrayPtr, field.type);
// Make zero-copy views instead of copying array contents
const viewedData = parseData(WASM_MEMORY.buffer, arrayPtr, field.type, false);
```

### `parseVector`

Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI struct into an [`arrow.Vector`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Vector.html) instance. Multiple `Vector` instances can be joined to make an [`arrow.Table`](https://arrow.apache.org/docs/js/classes/Arrow_dom.Table.html).

- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from.
- `ptr` (`number`): The numeric pointer in `buffer` where the C struct is located.
- `dataType` (`arrow.DataType`): The type of the vector to parse. This is retrieved from `field.type` on the result of `parseField`.
- `copy` (`boolean`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes.
- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes.

#### Example

```ts
const WASM_MEMORY: WebAssembly.Memory = ...
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
// Copy arrays into JS instead of creating views
const wasmVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type, true);
const copiedVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type);
// Make zero-copy views instead of copying array contents
const viewedVector = parseVector(WASM_MEMORY.buffer, arrayPtr, field.type, false);
```

### `parseRecordBatch`
Expand All @@ -47,12 +79,47 @@ Parse an [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html
- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from.
- `arrayPtr` (`number`): The numeric pointer in `buffer` where the _array_ C struct is located.
- `schemaPtr` (`number`): The numeric pointer in `buffer` where the _field_ C struct is located.
- `copy` (`boolean`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes.
- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes.

#### Example

```ts
const WASM_MEMORY: WebAssembly.Memory = ...
// Pass `true` to copy arrays across the boundary instead of creating views.
const recordBatch = parseRecordBatch(WASM_MEMORY.buffer, arrayPtr, fieldPtr, true);
const copiedRecordBatch = parseRecordBatch(
WASM_MEMORY.buffer,
arrayPtr,
fieldPtr
);
// Pass `false` to view arrays across the boundary instead of creating copies.
const viewedRecordBatch = parseRecordBatch(
WASM_MEMORY.buffer,
arrayPtr,
fieldPtr,
false
);
```

### `parseTable`

Parse an Arrow Table object from WebAssembly memory to an Arrow JS `Table`.

This expects an array of [`ArrowArray`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure) C FFI structs _plus_ an [`ArrowSchema`](https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowschema-structure) C FFI struct. Note that the underlying array and field pointers **must** be a `Struct` type. In essence a `Struct` array is used to mimic each `RecordBatch` while only being one array.

- `buffer` (`ArrayBuffer`): The [`WebAssembly.Memory`](https://developer.mozilla.org/en-US/docs/WebAssembly/JavaScript_interface/Memory) instance to read from.
- `arrayPtrs` (`number[]`): An array of numeric pointers describing the location in `buffer` where the _array_ C struct is located that represents each record batch.
- `schemaPtr` (`number`): The numeric pointer in `buffer` where the _field_ C struct is located.
- `copy` (`boolean`, default: `true`): If `true`, will _copy_ data across the Wasm boundary, allowing you to delete the copy on the Wasm side. If `false`, the resulting `arrow.Vector` objects will be _views_ on Wasm memory. This requires careful usage as the arrays will become invalid if the memory region in Wasm changes.

#### Example

```ts
const WASM_MEMORY: WebAssembly.Memory = ...
const table = parseRecordBatch(
WASM_MEMORY.buffer,
arrayPtrs,
schemaPtr,
true
);
```

## Type Support
Expand All @@ -78,15 +145,17 @@ Most of the unsupported types should be pretty straightforward to implement; the
### Binary & String

- [x] Binary
- [x] Large Binary (Not implemented by Arrow JS but supported by downcasting to `Binary`.)
- [x] Large Binary (Supported natively by Arrow JS as of v15)
- [x] String
- [x] Large String (Not implemented by Arrow JS but supported by downcasting to `String`.)
- [x] Large String (Supported natively by Arrow JS as of v15)
- [x] Fixed-width Binary

### Decimal

- [ ] Decimal128 (failing a test)
- [ ] Decimal256 (failing a test)
- [ ] Decimal128 (failing a test, this may be [#37920])
- [ ] Decimal256 (failing a test, this may be [#37920])

[#37920]: https://github.com/apache/arrow/issues/37920

### Temporal Types

Expand All @@ -95,7 +164,7 @@ Most of the unsupported types should be pretty straightforward to implement; the
- [x] Time32
- [x] Time64
- [x] Timestamp (with timezone)
- [ ] Duration
- [x] Duration
- [ ] Interval

### Nested Types
Expand All @@ -105,9 +174,9 @@ Most of the unsupported types should be pretty straightforward to implement; the
- [x] Fixed-size List
- [x] Struct
- [x] Map
- [ ] Dense Union
- [ ] Sparse Union
- [ ] Dictionary-encoded arrays
- [x] Dense Union
- [x] Sparse Union
- [x] Dictionary-encoded arrays

### Extension Types

Expand Down
19 changes: 12 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,33 @@
"type": "module",
"scripts": {
"build": "rollup -c rollup.config.js",
"watch": "tsc --watch --declaration",
"test": "vitest run"
"fmt:check": "prettier './src/**/*.ts' --check",
"fmt": "prettier './src/**/*.ts' --write",
"test": "vitest run",
"typecheck": "tsc --build",
"watch": "tsc --watch --declaration"
},
"files": [
"dist/",
"src/"
],
"peerDependencies": {
"apache-arrow": ">=13"
"apache-arrow": ">=15"
},
"devDependencies": {
"@rollup/plugin-terser": "^0.4.3",
"@rollup/plugin-typescript": "^11.1.2",
"apache-arrow": "^13",
"apache-arrow": "^15",
"prettier": "^3.1.0",
"rollup": "^4.1.5",
"rollup-plugin-dts": "^6.1.0",
"rust-arrow-ffi": "./tests/rust-arrow-ffi/pkg/",
"rust-arrow-ffi": "link:./tests/rust-arrow-ffi/pkg/",
"ts-node": "^10.9.1",
"typescript": "^5.2.2",
"vitest": "^0.34.6"
"vitest": "^1.2.2"
},
"volta": {
"node": "20.9.0"
"node": "20.9.0",
"yarn": "4.0.2"
}
}
Loading

0 comments on commit 1131edb

Please sign in to comment.