Skip to content

Commit

Permalink
Add duration support (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Jan 31, 2024
1 parent 3760085 commit c7063dd
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ Most of the unsupported types should be pretty straightforward to implement; the
- [x] Time32
- [x] Time64
- [x] Timestamp (with timezone)
- [ ] Duration
- [x] Duration
- [ ] Interval

### Nested Types
Expand Down
25 changes: 25 additions & 0 deletions src/field.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,31 @@ export function parseField(buffer: ArrayBuffer, ptr: number): arrow.Field {
return new arrow.Field(name, type, flags.nullable, metadata);
}

// duration
if (formatString.slice(0, 2) === "tD") {
let timeUnit: arrow.TimeUnit | null = null;
switch (formatString[2]) {
case "s":
timeUnit = arrow.TimeUnit.SECOND;
break;
case "m":
timeUnit = arrow.TimeUnit.MILLISECOND;
break;
case "u":
timeUnit = arrow.TimeUnit.MICROSECOND;
break;
case "n":
timeUnit = arrow.TimeUnit.NANOSECOND;
break;

default:
throw new Error(`invalid timestamp ${formatString}`);
}

const type = new arrow.Duration(timeUnit);
return new arrow.Field(name, type, flags.nullable, metadata);
}

// struct
if (formatString === "+s") {
const type = new arrow.Struct(childrenFields);
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export enum Type {
FixedSizeBinary = 15 /** Fixed-size binary. Each value occupies the same number of bytes */,
FixedSizeList = 16 /** Fixed-size list. Each value occupies the same number of bytes */,
Map = 17 /** Map of named logical types */,
Duration = 18 /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */,

// These 3 are not included in the upstream enum
LargeList = 30,
Expand Down
29 changes: 28 additions & 1 deletion src/vector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,31 @@ export function parseData<T extends DataType>(
});
}

if (DataType.isDuration(dataType)) {
const [validityPtr, dataPtr] = bufferPtrs;
const nullBitmap = parseNullBitmap(
dataView.buffer,
validityPtr,
length,
copy,
);

let byteWidth = getTimeByteWidth(dataType);
const data = copy
? new dataType.ArrayType(
copyBuffer(dataView.buffer, dataPtr, length * byteWidth),
)
: new dataType.ArrayType(dataView.buffer, dataPtr, length);
return arrow.makeData({
type: dataType,
offset,
length,
nullCount,
data,
nullBitmap,
});
}

if (DataType.isInterval(dataType)) {
const [validityPtr, dataPtr] = bufferPtrs;
const nullBitmap = parseNullBitmap(
Expand Down Expand Up @@ -642,7 +667,9 @@ function getDateByteWidth(type: arrow.Date_): number {
assertUnreachable();
}

function getTimeByteWidth(type: arrow.Time | arrow.Timestamp): number {
function getTimeByteWidth(
type: arrow.Time | arrow.Timestamp | arrow.Duration,
): number {
switch (type.unit) {
case arrow.TimeUnit.SECOND:
case arrow.TimeUnit.MILLISECOND:
Expand Down
33 changes: 33 additions & 0 deletions tests/ffi.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,39 @@ describe("date32", (t) => {
// }
// });

describe("duration", (t) => {
function test(copy: boolean) {
let columnIndex = TEST_TABLE.schema.fields.findIndex(
(field) => field.name == "duration"
);

const originalField = TEST_TABLE.schema.fields[columnIndex];
// declare it's not null
const originalVector = TEST_TABLE.getChildAt(columnIndex) as arrow.Vector;
const fieldPtr = FFI_TABLE.schemaAddr(columnIndex);
const field = parseField(WASM_MEMORY.buffer, fieldPtr);

expect(field.name).toStrictEqual(originalField.name);
expect(field.typeId).toStrictEqual(originalField.typeId);
expect(field.nullable).toStrictEqual(originalField.nullable);

const arrayPtr = FFI_TABLE.arrayAddr(0, columnIndex);
const wasmVector = parseVector(
WASM_MEMORY.buffer,
arrayPtr,
field.type,
copy
);

for (let i = 0; i < 3; i++) {
expect(originalVector.get(i), wasmVector.get(i));
}
}

it("copy=false", () => test(false));
it("copy=true", () => test(true));
});

describe("nullable int", (t) => {
function test(copy: boolean) {
let columnIndex = TEST_TABLE.schema.fields.findIndex(
Expand Down
16 changes: 16 additions & 0 deletions tests/pyarrow_generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from decimal import Decimal

import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.feather as feather

Expand Down Expand Up @@ -115,6 +116,20 @@ def timestamp_array() -> pa.Array:
return arr


def duration_array() -> pa.Array:
arr = pa.DurationArray.from_pandas(
[
pd.Timedelta("2d"),
pd.Timedelta("1d"),
pd.Timedelta("1w"),
]
)

assert isinstance(arr, pa.DurationArray)
assert arr.type.unit == "us"
return arr


def nullable_int() -> pa.Array:
# True means null
mask = [True, False, True]
Expand Down Expand Up @@ -227,6 +242,7 @@ def table() -> pa.Table:
"nullable_int": nullable_int(),
"sparse_union": sparse_union_array(),
"dense_union": dense_union_array(),
"duration": duration_array(),
}
)

Expand Down
Binary file modified tests/table.arrow
Binary file not shown.

0 comments on commit c7063dd

Please sign in to comment.