Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(arrow): Triangulate on worker #2789

Merged
merged 14 commits into from
Dec 5, 2023
45 changes: 26 additions & 19 deletions modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ type BinaryGeometryContent = {
geomOffset: Int32Array;
/** Array of geometry indicies: the start index of each geometry */
geometryIndicies: Uint16Array;
/** (Optional) indices of triangels returned from polygon tessellation (Polygon only) */
/** (Optional) indices of triangels returned from polygon triangulation (Polygon only) */
triangles?: Uint32Array;
/** (Optional) array of mean center of each geometry */
meanCenters?: Float64Array;
Expand All @@ -54,17 +54,21 @@ type BinaryGeometryContent = {
/**
* binary geometry template, see deck.gl BinaryGeometry
*/
export const BINARY_GEOMETRY_TEMPLATE = {
globalFeatureIds: {value: new Uint32Array(0), size: 1},
positions: {value: new Float32Array(0), size: 2},
properties: [],
numericProps: {},
featureIds: {value: new Uint32Array(0), size: 1}
};
export function getBinaryGeometryTemplate() {
return {
globalFeatureIds: {value: new Uint32Array(0), size: 1},
positions: {value: new Float32Array(0), size: 2},
properties: [],
numericProps: {},
featureIds: {value: new Uint32Array(0), size: 1}
};
}

export type BinaryGeometriesFromArrowOptions = {
/** option to specify which chunk to get binary geometries from, for progressive rendering */
chunkIndex?: number;
/** The offset (beginning index of rows) of input chunk. Used for reconstructing globalFeatureIds in web workers */
chunkOffset?: number;
/** option to get mean centers from geometries, for polygon filtering */
calculateMeanCenters?: boolean;
/** option to compute the triangle indices by tesselating polygons */
Expand All @@ -90,9 +94,12 @@ export function getBinaryGeometriesFromArrow(
line: geoEncoding === 'geoarrow.multilinestring' || geoEncoding === 'geoarrow.linestring'
};

const chunks = options?.chunkIndex ? [geoColumn.data[options?.chunkIndex]] : geoColumn.data;
const chunks =
options?.chunkIndex !== undefined && options?.chunkIndex >= 0
? [geoColumn.data[options?.chunkIndex]]
: geoColumn.data;
let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity];
let globalFeatureIdOffset = 0;
let globalFeatureIdOffset = options?.chunkOffset || 0;
const binaryGeometries: BinaryFeatures[] = [];

chunks.forEach((chunk) => {
Expand All @@ -111,6 +118,7 @@ export function getBinaryGeometriesFromArrow(
size: nDim
},
featureIds: {value: featureIds, size: 1},
// eslint-disable-next-line no-loop-func
properties: [...Array(chunk.length).keys()].map((i) => ({
index: i + globalFeatureIdOffset
}))
Expand All @@ -123,18 +131,18 @@ export function getBinaryGeometriesFromArrow(
shape: 'binary-feature-collection',
points: {
type: 'Point',
...BINARY_GEOMETRY_TEMPLATE,
...getBinaryGeometryTemplate(),
...(featureTypes.point ? binaryContent : {})
},
lines: {
type: 'LineString',
...BINARY_GEOMETRY_TEMPLATE,
...getBinaryGeometryTemplate(),
...(featureTypes.line ? binaryContent : {}),
pathIndices: {value: featureTypes.line ? geomOffset : new Uint16Array(0), size: 1}
},
polygons: {
type: 'Polygon',
...BINARY_GEOMETRY_TEMPLATE,
...getBinaryGeometryTemplate(),
...(featureTypes.polygon ? binaryContent : {}),
polygonIndices: {
// use geomOffset as polygonIndices same as primitivePolygonIndices since we are using earcut to get triangule indices
Expand Down Expand Up @@ -281,7 +289,7 @@ function getBinaryGeometriesFromChunk(
* @param primitivePolygonIndices Indices within positions of the start of each primitive Polygon/ring
* @param flatCoordinateArray Array of x, y or x, y, z positions
* @param nDim - number of dimensions per position
* @returns
* @returns triangle indices or null if invalid polygon and earcut fails
*/
export function getTriangleIndices(
polygonIndices: Uint16Array,
Expand All @@ -306,13 +314,14 @@ export function getTriangleIndices(
}
primitiveIndex++;
}
// TODO check if each ring is closed
const triangleIndices = earcut(
slicedFlatCoords,
holeIndices.length > 0 ? holeIndices : undefined,
nDim
);
if (triangleIndices.length === 0) {
throw Error('can not tesselate invalid polygon');
throw Error('earcut failed e.g. invalid polygon');
lixun910 marked this conversation as resolved.
Show resolved Hide resolved
}
for (let j = 0; j < triangleIndices.length; j++) {
triangles.push(triangleIndices[j] + startIdx);
Expand All @@ -325,9 +334,7 @@ export function getTriangleIndices(
}
return trianglesUint32;
} catch (error) {
// TODO - add logging
// there is an expection when tesselating invalid polygon, e.g. polygon with self-intersection
// return null to skip tesselating
// if earcut fails, return null
return null;
}
}
Expand Down Expand Up @@ -379,8 +386,8 @@ function getBinaryPolygonsFromChunk(

return {
featureIds,
flatCoordinateArray,
nDim,
flatCoordinateArray,
geomOffset,
geometryIndicies,
...(options?.triangulate && triangles ? {triangles} : {})
Expand Down
10 changes: 8 additions & 2 deletions modules/arrow/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export type {
BinaryGeometriesFromArrowOptions
} from './geoarrow/convert-geoarrow-to-binary-geometry';
export {
BINARY_GEOMETRY_TEMPLATE,
getBinaryGeometryTemplate,
getBinaryGeometriesFromArrow,
getTriangleIndices,
getMeanCentersFromBinaryGeometries
Expand All @@ -65,5 +65,11 @@ export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson-geo
export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-table';

// EXPERIMENTAL WORKER
export {hardClone} from './workers/hard-clone';

export {TriangulationWorker, triangulateOnWorker} from './triangulate-on-worker';
export type {ParseGeoArrowInput, ParseGeoArrowResult} from './triangulate-on-worker';
export {
TriangulationWorker,
triangulateOnWorker,
parseGeoArrowOnWorker
} from './triangulate-on-worker';
57 changes: 50 additions & 7 deletions modules/arrow/src/triangulate-on-worker.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,52 @@
// loaders.gl, MIT license
// Copyright (c) vis.gl contributors

import * as arrow from 'apache-arrow';
import type {WorkerOptions} from '@loaders.gl/worker-utils';
import {processOnWorker} from '@loaders.gl/worker-utils';
import {BinaryDataFromGeoArrow, GeoArrowEncoding} from '@loaders.gl/arrow';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

export type TriangulationWorkerInput = TriangulateInput | {operation: 'test'; data: any};
export type TriangulationWorkerOutput = TriangulateResult | {operation: 'test'; data: any};
export type TriangulationWorkerInput =
| ({operation: 'triangulate'} & TriangulateInput)
| ParseGeoArrowInput
| {operation: 'test'; data: any};

export type TriangulationWorkerOutput =
| ({operation: 'triangulate'} & TriangulateResult)
| ({operation: 'parse-geoarrow'} & ParseGeoArrowResult)
| {operation: 'test'; data: any};

type GeoArrowChunkData = {
type: arrow.DataType;
offset: number;
length: number;
nullCount: number;
buffers: any;
children: arrow.Data[];
dictionary?: arrow.Vector;
};

export type ParseGeoArrowInput = {
operation: 'parse-geoarrow';
chunkData: GeoArrowChunkData;
chunkIndex: number;
chunkOffset: number;
geometryEncoding: GeoArrowEncoding;
calculateMeanCenters: boolean;
triangle: boolean;
};

export type ParseGeoArrowResult = {
chunkIndex: number;
binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null;
};

/** Input data for operation: 'triangulate' */
export type TriangulateInput = {
operation: 'triangulate';
polygonIndices: Uint16Array;
primitivePolygonIndices: Int32Array;
flatCoordinateArray: Float64Array;
Expand All @@ -37,11 +70,21 @@ export const TriangulationWorker = {
};

/**
* Provide type safety
* Triangulate a set of polygons on worker, type safe API
*/
export function triangulateOnWorker(
data: TriangulationWorkerInput,
data: TriangulateInput,
options: WorkerOptions = {}
): Promise<TriangulateResult> {
return processOnWorker(TriangulationWorker, {...data, operation: 'triangulate'}, options);
}

/**
* Parse GeoArrow geometry colum on worker, type safe API
*/
export function parseGeoArrowOnWorker(
data: ParseGeoArrowInput,
options: WorkerOptions = {}
): Promise<TriangulationWorkerOutput> {
return processOnWorker(TriangulationWorker, data, options);
): Promise<ParseGeoArrowResult> {
return processOnWorker(TriangulationWorker, {...data, operation: 'parse-geoarrow'}, options);
}
162 changes: 162 additions & 0 deletions modules/arrow/src/workers/hard-clone.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import * as arrow from 'apache-arrow';
import type {Buffers} from 'apache-arrow/data';

type TypedArray =
| Uint8Array
| Uint8ClampedArray
| Uint16Array
| Uint32Array
| Int8Array
| Int16Array
| Int32Array
| Float32Array
| Float64Array;

/**
* Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if
* it is shared with other.
*
* The purpose of this function is to enable transferring a `Data` instance,
* e.g. to a web worker, without neutering any other data.
*
* Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where
* the typed array's `byteOffset` is not `0` and where its `byteLength` does not
* match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`.
*
* If `force` is `true`, always clone internal buffers, even if not shared. If
* the default, `false`, any internal buffers that are **not** a slice of a
* larger `ArrayBuffer` will not be copied.
*/
export function hardClone<T extends arrow.DataType>(
input: arrow.Data<T>,
force?: boolean
): arrow.Data<T>;
export function hardClone<T extends arrow.DataType>(
input: arrow.Vector<T>,
force?: boolean
): arrow.Vector<T>;

export function hardClone<T extends arrow.DataType>(
data: arrow.Data<T> | arrow.Vector<T>,
force: boolean = false
): arrow.Data<T> | arrow.Vector<T> {
// Check if `data` is an arrow.Vector
if ('data' in data) {
return new arrow.Vector(data.data.map((data) => hardClone(data, force)));
}

// Clone each of the children, recursively
const clonedChildren: arrow.Data[] = [];
for (const childData of data.children) {
clonedChildren.push(hardClone(childData, force));
}

// Clone the dictionary if there is one
let clonedDictionary: arrow.Vector | undefined;
if (data.dictionary !== undefined) {
clonedDictionary = hardClone(data.dictionary, force);
}

// Buffers can have up to four entries. Each of these can be `undefined` for
// one or more array types.
//
// - OFFSET: value offsets for variable size list types
// - DATA: the underlying data
// - VALIDITY: the null buffer. This may be empty or undefined if all elements
// are non-null/valid.
// - TYPE: type ids for a union type.
const clonedBuffers: Buffers<T> = {
[arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force),
[arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force),
[arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force),
[arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force)
};

// Note: the data.offset is passed on so that a sliced Data instance will not
// be "un-sliced". However keep in mind that this means we're cloning the
// _original backing buffer_, not only the portion of the Data that was
// sliced.
return new arrow.Data(
data.type,
data.offset,
data.length,
// @ts-expect-error _nullCount is protected. We're using it here to mimic
// `Data.clone`
data._nullCount,
clonedBuffers,
clonedChildren,
clonedDictionary
);
}

/**
* Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`.
*/
export function isShared<T extends arrow.DataType>(data: arrow.Data<T> | arrow.Vector<T>): boolean {
// Loop over arrow.Vector
if ('data' in data) {
return data.data.some((data) => isShared(data));
}

// Check child data
for (const childData of data.children) {
if (isShared(childData)) {
return true;
}
}

// Check dictionary
if (data.dictionary !== undefined) {
if (isShared(data.dictionary)) {
return true;
}
}

const bufferTypes = [
arrow.BufferType.OFFSET,
arrow.BufferType.DATA,
arrow.BufferType.VALIDITY,
arrow.BufferType.TYPE
];
for (const bufferType of bufferTypes) {
if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) {
return true;
}
}

return false;
}

/**
* Returns true if the current typed array is a partial slice on a larger
* ArrayBuffer
*/
function isTypedArraySliced(arr: TypedArray): boolean {
return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength);
}

/**
* If a slice of a larger ArrayBuffer, clone to a fresh `ArrayBuffer`.
*
* If `force` is `true`, always clone the array, even if not shared.
*/
function cloneBuffer<A extends TypedArray | undefined>(arr: A, force: boolean): A {
// Not all buffer types are defined for every type of Arrow array. E.g.
// `arrow.BufferType.TYPE` is only defined for the Union type.
if (arr === undefined) {
return arr;
}

// The current array is not a part of a larger ArrayBuffer, don't clone it
if (!force && !isTypedArraySliced(arr)) {
return arr;
}

// Note: TypedArray.slice() **copies** into a new ArrayBuffer

// @ts-expect-error 'Uint8Array' is assignable to the constraint of type 'A',
// but 'A' could be instantiated with a different subtype of constraint
// 'TypedArray'
// We know from arr.slice that it will always return the same
return arr.slice();
}
Loading
Loading