From 331c4b7fa00ed244074f2a46ea93cd49a3a68e54 Mon Sep 17 00:00:00 2001 From: Ib Green Date: Fri, 17 Nov 2023 05:57:42 -0500 Subject: [PATCH 01/14] feat(arrow): Triangulate on worker --- .../convert-geoarrow-to-binary-geometry.ts | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index d495947bae..9bf6ce9f4f 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -7,6 +7,7 @@ import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema'; import {GeoArrowEncoding} from '@loaders.gl/gis'; import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds'; import {TypedArray} from '@loaders.gl/loader-utils'; +import {triangulateOnWorker} from '../triangulate-on-worker'; /** * Binary geometry type @@ -45,7 +46,7 @@ type BinaryGeometryContent = { geomOffset: Int32Array; /** Array of geometry indicies: the start index of each geometry */ geometryIndicies: Uint16Array; - /** (Optional) indices of triangels returned from polygon tessellation (Polygon only) */ + /** (Optional) indices of triangels returned from polygon triangulation (Polygon only) */ triangles?: Uint32Array; /** (Optional) array of mean center of each geometry */ meanCenters?: Float64Array; @@ -79,11 +80,11 @@ export type BinaryGeometriesFromArrowOptions = { * @param options options for getting binary geometries {meanCenter: boolean} * @returns BinaryDataFromGeoArrow */ -export function getBinaryGeometriesFromArrow( +export async function getBinaryGeometriesFromArrow( geoColumn: arrow.Vector, geoEncoding: GeoArrowEncoding, options?: BinaryGeometriesFromArrowOptions -): BinaryDataFromGeoArrow { +): Promise { const featureTypes = { polygon: geoEncoding === 'geoarrow.multipolygon' || geoEncoding === 'geoarrow.polygon', point: geoEncoding === 'geoarrow.multipoint' || geoEncoding === 'geoarrow.point', @@ -95,7 +96,7 @@ export function getBinaryGeometriesFromArrow( let globalFeatureIdOffset = 0; const binaryGeometries: BinaryFeatures[] = []; - chunks.forEach((chunk) => { + for (const chunk of chunks) { const {featureIds, flatCoordinateArray, nDim, geomOffset, triangles} = getBinaryGeometriesFromChunk(chunk, geoEncoding, options); @@ -111,6 +112,7 @@ export function getBinaryGeometriesFromArrow( size: nDim }, featureIds: {value: featureIds, size: 1}, + // eslint-disable-next-line no-loop-func properties: [...Array(chunk.length).keys()].map((i) => ({ index: i + globalFeatureIdOffset })) @@ -150,7 +152,7 @@ export function getBinaryGeometriesFromArrow( }); bounds = updateBoundsFromGeoArrowSamples(flatCoordinateArray, nDim, bounds); - }); + } return { binaryGeometries, @@ -255,7 +257,7 @@ function getMeanCentersFromGeometry( * @param options options for getting binary geometries * @returns BinaryGeometryContent */ -function getBinaryGeometriesFromChunk( +async function getBinaryGeometriesFromChunk( chunk: arrow.Data, geoEncoding: GeoArrowEncoding, options?: BinaryGeometriesFromArrowOptions From dbacbe1681b699000f6e669887bdf4a2e2ceeb5b Mon Sep 17 00:00:00 2001 From: Ib Green Date: Fri, 17 Nov 2023 14:52:29 -0500 Subject: [PATCH 02/14] wip --- .../arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index 9bf6ce9f4f..a7a5fe5d4d 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -7,7 +7,7 @@ import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema'; import {GeoArrowEncoding} from '@loaders.gl/gis'; import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds'; import {TypedArray} from '@loaders.gl/loader-utils'; -import {triangulateOnWorker} from '../triangulate-on-worker'; +import {TriangulateResult, triangulateOnWorker} from '../triangulate-on-worker'; /** * Binary geometry type From 01f2c6010cb98a3670a18ab3cd8304ecbe5b3477 Mon Sep 17 00:00:00 2001 From: Ib Green Date: Fri, 17 Nov 2023 15:10:36 -0500 Subject: [PATCH 03/14] wip --- .../convert-geoarrow-to-binary-geometry.ts | 44 ++++++++++++++++++- modules/arrow/src/triangulate-on-worker.ts | 16 ++++--- .../arrow/test/triangulate-on-worker.spec.ts | 5 ++- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index a7a5fe5d4d..a5eaed179b 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -277,6 +277,47 @@ async function getBinaryGeometriesFromChunk( } } +/** + * get binary polygons from geoarrow polygon column + * @param chunk one chunk of geoarrow polygon column + * @param geoEncoding the geo encoding of the geoarrow polygon column + * @returns BinaryGeometryContent + */ +function getBinaryPolygonsFromChunk(chunk: arrow.Data, geoEncoding: string): BinaryGeometryContent { + const binaryGeometry = getUntriangulatedBinaryPolygonsFromChunk(chunk, geoEncoding); + const {geometryIndicies, geomOffset, flatCoordinateArray, nDim} = binaryGeometry; + const triangles = getTriangleIndices(geometryIndicies, geomOffset, flatCoordinateArray, nDim); + return { + ...binaryGeometry, + triangles + }; +} + +/** + * get binary polygons from geoarrow polygon column + * @param chunk one chunk of geoarrow polygon column + * @param geoEncoding the geo encoding of the geoarrow polygon column + * @returns BinaryGeometryContent + */ +export async function getBinaryPolygonsFromChunkAsync( + chunk: arrow.Data, + geoEncoding: string +): Promise { + const binaryGeometry = getUntriangulatedBinaryPolygonsFromChunk(chunk, geoEncoding); + const {geometryIndicies, geomOffset, flatCoordinateArray, nDim} = binaryGeometry; + const triangulationOutput = await triangulateOnWorker({ + polygonIndices: geometryIndicies, + primitivePolygonIndices: geomOffset, + flatCoordinateArray, + nDim + }); + + return { + ...binaryGeometry, + triangles: triangulationOutput.triangleIndices + }; +} + /** * get triangle indices. Allows deck.gl to skip performing costly triangulation on main thread. * @param polygonIndices Indices within positions of the start of each simple Polygon @@ -381,8 +422,9 @@ function getBinaryPolygonsFromChunk( return { featureIds, - flatCoordinateArray, nDim, + flatCoordinateArray, + geomOffset, geometryIndicies, ...(options?.triangulate && triangles ? {triangles} : {}) diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index 3d8ce90850..0588232dca 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -8,12 +8,16 @@ import {processOnWorker} from '@loaders.gl/worker-utils'; // @ts-ignore TS2304: Cannot find name '__VERSION__'. const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest'; -export type TriangulationWorkerInput = TriangulateInput | {operation: 'test'; data: any}; -export type TriangulationWorkerOutput = TriangulateResult | {operation: 'test'; data: any}; +export type TriangulationWorkerInput = + | ({operation: 'triangulate'} & TriangulateInput) + | {operation: 'test'; data: any}; + +export type TriangulationWorkerOutput = + | ({operation: 'triangulate'} & TriangulateResult) + | {operation: 'test'; data: any}; /** Input data for operation: 'triangulate' */ export type TriangulateInput = { - operation: 'triangulate'; polygonIndices: Uint16Array; primitivePolygonIndices: Int32Array; flatCoordinateArray: Float64Array; @@ -40,8 +44,8 @@ export const TriangulationWorker = { * Provide type safety */ export function triangulateOnWorker( - data: TriangulationWorkerInput, + data: TriangulateInput, options: WorkerOptions = {} -): Promise { - return processOnWorker(TriangulationWorker, data, options); +): Promise { + return processOnWorker(TriangulationWorker, {...data, operation: 'triangulate'}, options); } diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index 188601110b..a65286126f 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -38,7 +38,9 @@ test('TriangulationWorker#plumbing', async (t) => { t.end(); }); -test('triangulateOnWorker#plumbing', async (t) => { +test.skip('triangulateOnWorker#plumbing', async (t) => { + t.ok(triangulateOnWorker, 'triangulateOnWorker defined'); + /* const triangulatedData = await triangulateOnWorker( { operation: 'test', @@ -58,6 +60,7 @@ test('triangulateOnWorker#plumbing', async (t) => { // operation: 'error', // _workerType: 'test' // }), 'Triangulation worker throws on incorrect operation'); + */ if (!isBrowser) { const workerFarm = WorkerFarm.getWorkerFarm({}); From f273038ac3df3a3944a87cdc8c696e88599d4f77 Mon Sep 17 00:00:00 2001 From: Ib Green Date: Fri, 17 Nov 2023 15:12:58 -0500 Subject: [PATCH 04/14] wip --- .../arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index a5eaed179b..94971df7a2 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -7,7 +7,7 @@ import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema'; import {GeoArrowEncoding} from '@loaders.gl/gis'; import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds'; import {TypedArray} from '@loaders.gl/loader-utils'; -import {TriangulateResult, triangulateOnWorker} from '../triangulate-on-worker'; +import {triangulateOnWorker} from '../triangulate-on-worker'; /** * Binary geometry type From b9b99dd258b663e23d385b4e54b17942b3813d83 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Tue, 21 Nov 2023 16:27:50 -0700 Subject: [PATCH 05/14] add parseGeoArrow job Signed-off-by: Xun Li --- .../convert-geoarrow-to-binary-geometry.ts | 68 +++---------------- modules/arrow/src/triangulate-on-worker.ts | 18 +++++ .../arrow/src/workers/triangulation-worker.ts | 42 +++++++++++- .../arrow/test/triangulate-on-worker.spec.ts | 27 ++++++++ 4 files changed, 94 insertions(+), 61 deletions(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index 94971df7a2..7dfe7f9312 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -7,7 +7,6 @@ import {BinaryFeatureCollection as BinaryFeatures} from '@loaders.gl/schema'; import {GeoArrowEncoding} from '@loaders.gl/gis'; import {updateBoundsFromGeoArrowSamples} from './get-arrow-bounds'; import {TypedArray} from '@loaders.gl/loader-utils'; -import {triangulateOnWorker} from '../triangulate-on-worker'; /** * Binary geometry type @@ -80,11 +79,11 @@ export type BinaryGeometriesFromArrowOptions = { * @param options options for getting binary geometries {meanCenter: boolean} * @returns BinaryDataFromGeoArrow */ -export async function getBinaryGeometriesFromArrow( +export function getBinaryGeometriesFromArrow( geoColumn: arrow.Vector, geoEncoding: GeoArrowEncoding, options?: BinaryGeometriesFromArrowOptions -): Promise { +): BinaryDataFromGeoArrow { const featureTypes = { polygon: geoEncoding === 'geoarrow.multipolygon' || geoEncoding === 'geoarrow.polygon', point: geoEncoding === 'geoarrow.multipoint' || geoEncoding === 'geoarrow.point', @@ -96,7 +95,7 @@ export async function getBinaryGeometriesFromArrow( let globalFeatureIdOffset = 0; const binaryGeometries: BinaryFeatures[] = []; - for (const chunk of chunks) { + chunks.forEach((chunk) => { const {featureIds, flatCoordinateArray, nDim, geomOffset, triangles} = getBinaryGeometriesFromChunk(chunk, geoEncoding, options); @@ -152,7 +151,7 @@ export async function getBinaryGeometriesFromArrow( }); bounds = updateBoundsFromGeoArrowSamples(flatCoordinateArray, nDim, bounds); - } + }); return { binaryGeometries, @@ -257,7 +256,7 @@ function getMeanCentersFromGeometry( * @param options options for getting binary geometries * @returns BinaryGeometryContent */ -async function getBinaryGeometriesFromChunk( +function getBinaryGeometriesFromChunk( chunk: arrow.Data, geoEncoding: GeoArrowEncoding, options?: BinaryGeometriesFromArrowOptions @@ -277,54 +276,13 @@ async function getBinaryGeometriesFromChunk( } } -/** - * get binary polygons from geoarrow polygon column - * @param chunk one chunk of geoarrow polygon column - * @param geoEncoding the geo encoding of the geoarrow polygon column - * @returns BinaryGeometryContent - */ -function getBinaryPolygonsFromChunk(chunk: arrow.Data, geoEncoding: string): BinaryGeometryContent { - const binaryGeometry = getUntriangulatedBinaryPolygonsFromChunk(chunk, geoEncoding); - const {geometryIndicies, geomOffset, flatCoordinateArray, nDim} = binaryGeometry; - const triangles = getTriangleIndices(geometryIndicies, geomOffset, flatCoordinateArray, nDim); - return { - ...binaryGeometry, - triangles - }; -} - -/** - * get binary polygons from geoarrow polygon column - * @param chunk one chunk of geoarrow polygon column - * @param geoEncoding the geo encoding of the geoarrow polygon column - * @returns BinaryGeometryContent - */ -export async function getBinaryPolygonsFromChunkAsync( - chunk: arrow.Data, - geoEncoding: string -): Promise { - const binaryGeometry = getUntriangulatedBinaryPolygonsFromChunk(chunk, geoEncoding); - const {geometryIndicies, geomOffset, flatCoordinateArray, nDim} = binaryGeometry; - const triangulationOutput = await triangulateOnWorker({ - polygonIndices: geometryIndicies, - primitivePolygonIndices: geomOffset, - flatCoordinateArray, - nDim - }); - - return { - ...binaryGeometry, - triangles: triangulationOutput.triangleIndices - }; -} - /** * get triangle indices. Allows deck.gl to skip performing costly triangulation on main thread. * @param polygonIndices Indices within positions of the start of each simple Polygon * @param primitivePolygonIndices Indices within positions of the start of each primitive Polygon/ring * @param flatCoordinateArray Array of x, y or x, y, z positions * @param nDim - number of dimensions per position - * @returns + * @returns triangle indices or null if invalid polygon and earcut fails */ export function getTriangleIndices( polygonIndices: Uint16Array, @@ -355,7 +313,7 @@ export function getTriangleIndices( nDim ); if (triangleIndices.length === 0) { - throw Error('can not tesselate invalid polygon'); + throw Error('earcut failed e.g. invalid polygon'); } for (let j = 0; j < triangleIndices.length; j++) { triangles.push(triangleIndices[j] + startIdx); @@ -368,9 +326,7 @@ export function getTriangleIndices( } return trianglesUint32; } catch (error) { - // TODO - add logging - // there is an expection when tesselating invalid polygon, e.g. polygon with self-intersection - // return null to skip tesselating + // if earcut fails, return null return null; } } @@ -379,14 +335,9 @@ export function getTriangleIndices( * get binary polygons from geoarrow polygon column * @param chunk one chunk of geoarrow polygon column * @param geoEncoding the geo encoding of the geoarrow polygon column - * @param options options for getting binary geometries * @returns BinaryGeometryContent */ -function getBinaryPolygonsFromChunk( - chunk: arrow.Data, - geoEncoding: string, - options?: BinaryGeometriesFromArrowOptions -): BinaryGeometryContent { +function getBinaryPolygonsFromChunk(chunk: arrow.Data, geoEncoding: string): BinaryGeometryContent { const isMultiPolygon = geoEncoding === 'geoarrow.multipolygon'; const polygonData = isMultiPolygon ? chunk.children[0] : chunk; @@ -424,7 +375,6 @@ function getBinaryPolygonsFromChunk( featureIds, nDim, flatCoordinateArray, - geomOffset, geometryIndicies, ...(options?.triangulate && triangles ? {triangles} : {}) diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index 0588232dca..5a8232952c 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -3,6 +3,7 @@ import type {WorkerOptions} from '@loaders.gl/worker-utils'; import {processOnWorker} from '@loaders.gl/worker-utils'; +import {BinaryDataFromGeoArrow, GeoArrowEncoding} from '@loaders.gl/arrow'; // __VERSION__ is injected by babel-plugin-version-inline // @ts-ignore TS2304: Cannot find name '__VERSION__'. @@ -10,12 +11,29 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest'; export type TriangulationWorkerInput = | ({operation: 'triangulate'} & TriangulateInput) + | ParseGeoArrowInput | {operation: 'test'; data: any}; export type TriangulationWorkerOutput = | ({operation: 'triangulate'} & TriangulateResult) + | ({operation: 'parseGeoArrow'} & ParseGeoArrowResult) | {operation: 'test'; data: any}; +export type ParseGeoArrowInput = { + operation: 'parseGeoArrow'; + arrowData: ArrayBuffer; + chunkIndex: number; + geometryColumnName: string; + geometryEncoding: GeoArrowEncoding; + meanCenter: boolean; + triangle: boolean; +}; + +export type ParseGeoArrowResult = { + chunkIndex: number; + binaryGeometries: BinaryDataFromGeoArrow | null; +}; + /** Input data for operation: 'triangulate' */ export type TriangulateInput = { polygonIndices: Uint16Array; diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index 8e5972bbe6..27c450cb7a 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -1,12 +1,19 @@ // loaders.gl, MIT license // Copyright (c) vis.gl contributors +import * as arrow from 'apache-arrow'; import {createWorker} from '@loaders.gl/worker-utils'; -import {getTriangleIndices} from '../geoarrow/convert-geoarrow-to-binary-geometry'; +import { + getTriangleIndices, + getBinaryGeometriesFromArrow, + BinaryDataFromGeoArrow +} from '../geoarrow/convert-geoarrow-to-binary-geometry'; import type { TriangulationWorkerInput, TriangulateInput, - TriangulateResult + TriangulateResult, + ParseGeoArrowInput, + ParseGeoArrowResult } from '../triangulate-on-worker'; createWorker(async (data, options = {}) => { @@ -17,6 +24,8 @@ createWorker(async (data, options = {}) => { return input; case 'triangulate': return triangulateBatch(data); + case 'parseGeoArrow': + return parseGeoArrowBatch(data); default: throw new Error( `TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'` @@ -37,3 +46,32 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult { ); return {...data, ...(triangleIndices ? {triangleIndices} : {})}; } + +/** + * Reading the arrow file into memory is very fast. Parsing the geoarrow column is slow, and blocking the main thread. + * To address this issue, we can move the parsing job from main thread to parallel web workers. + * Each web worker will parse the geoarrow column using one chunk/batch of arrow data, and return binary geometries to main thread. + * The app on the main thread will render the binary geometries and the parsing will not block the main thread. + * + * @param data + * @returns + */ +function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { + let binaryGeometries: BinaryDataFromGeoArrow | null = null; + const {arrowData, chunkIndex, geometryColumnName, geometryEncoding, meanCenter, triangle} = data; + const arrowTable = arrow.tableFromIPC(arrowData); + const geometryColumn = arrowTable.getChild(geometryColumnName); + if (geometryColumn) { + const options = {meanCenter, triangle, chunkIndex}; + binaryGeometries = getBinaryGeometriesFromArrow(geometryColumn, geometryEncoding, options); + // NOTE: here binaryGeometry will be copied to main thread + return { + binaryGeometries, + chunkIndex: data.chunkIndex + }; + } + return { + binaryGeometries, + chunkIndex: data.chunkIndex + }; +} diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index a65286126f..47ed824718 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -3,7 +3,34 @@ import test from 'tape-promise/tape'; import {triangulateOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; +import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; +import {POINT_ARROW_FILE} from './geoarrow/convert-geoarrow-to-geojson.spec'; + +test.only('TriangulationWorker#plumbing', async (t) => { + const arrowFile = await fetchFile(POINT_ARROW_FILE); + const arrowContent = await arrowFile.arrayBuffer(); + + // simulate parsing 1st batch/chunk of the arrow data in web worker from e.g. kepler + const sourceData = { + operation: 'parseGeoArrow', + arrowData: arrowContent, + chunkIndex: 0, + geometryColumnName: 'geometry', + geometryEncoding: 'geoarrow.point', + meanCenter: true, + triangle: false + }; + const parsedGeoArrowData = await processOnWorker(TriangulationWorker, sourceData, { + _workerType: 'test' + }); + + // kepler should await for the result from web worker and render the binary geometries + console.log(parsedGeoArrowData); + + t.ok(parsedGeoArrowData, 'ParseGeoArrow worker echoed input data'); + t.end(); +}); // WORKER TESTS test('TriangulationWorker#plumbing', async (t) => { From b4d4f3fabfc0e80c8e02ec89687b82ba425b3e11 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Tue, 21 Nov 2023 16:29:12 -0700 Subject: [PATCH 06/14] remove test.only Signed-off-by: Xun Li --- modules/arrow/test/triangulate-on-worker.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index 47ed824718..f870119c93 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -7,7 +7,7 @@ import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; import {POINT_ARROW_FILE} from './geoarrow/convert-geoarrow-to-geojson.spec'; -test.only('TriangulationWorker#plumbing', async (t) => { +test('TriangulationWorker#plumbing', async (t) => { const arrowFile = await fetchFile(POINT_ARROW_FILE); const arrowContent = await arrowFile.arrayBuffer(); From b57efd8b064ab36ff199d85d3336a440fb464064 Mon Sep 17 00:00:00 2001 From: Ib Green Date: Wed, 22 Nov 2023 10:24:12 -0500 Subject: [PATCH 07/14] parseGeoArrowOnWorker --- modules/arrow/src/index.ts | 6 +- modules/arrow/src/triangulate-on-worker.ts | 16 ++++- .../arrow/src/workers/triangulation-worker.ts | 2 +- .../arrow/test/triangulate-on-worker.spec.ts | 62 ++++++++++--------- 4 files changed, 51 insertions(+), 35 deletions(-) diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index f30799d123..8d61b91e5c 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -66,4 +66,8 @@ export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-tabl // EXPERIMENTAL WORKER -export {TriangulationWorker, triangulateOnWorker} from './triangulate-on-worker'; +export { + TriangulationWorker, + triangulateOnWorker, + parseGeoArrowOnWorker +} from './triangulate-on-worker'; diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index 5a8232952c..68c06cab41 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -16,11 +16,11 @@ export type TriangulationWorkerInput = export type TriangulationWorkerOutput = | ({operation: 'triangulate'} & TriangulateResult) - | ({operation: 'parseGeoArrow'} & ParseGeoArrowResult) + | ({operation: 'parse-geoarrow'} & ParseGeoArrowResult) | {operation: 'test'; data: any}; export type ParseGeoArrowInput = { - operation: 'parseGeoArrow'; + operation: 'parse-geoarrow'; arrowData: ArrayBuffer; chunkIndex: number; geometryColumnName: string; @@ -59,7 +59,7 @@ export const TriangulationWorker = { }; /** - * Provide type safety + * Triangulate a set of polygons on worker, type safe API */ export function triangulateOnWorker( data: TriangulateInput, @@ -67,3 +67,13 @@ export function triangulateOnWorker( ): Promise { return processOnWorker(TriangulationWorker, {...data, operation: 'triangulate'}, options); } + +/** + * Parse GeoArrow geometry colum on worker, type safe API + */ +export function parseGeoArrowOnWorker( + data: ParseGeoArrowInput, + options: WorkerOptions = {} +): Promise { + return processOnWorker(TriangulationWorker, {...data, operation: 'parse-geoarrow'}, options); +} diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index 27c450cb7a..41bd265793 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -24,7 +24,7 @@ createWorker(async (data, options = {}) => { return input; case 'triangulate': return triangulateBatch(data); - case 'parseGeoArrow': + case 'parse-geoarrow': return parseGeoArrowBatch(data); default: throw new Error( diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index f870119c93..00488dbc50 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -2,36 +2,11 @@ // Copyright (c) vis.gl contributors import test from 'tape-promise/tape'; -import {triangulateOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; +import {triangulateOnWorker, parseGeoArrowOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; import {POINT_ARROW_FILE} from './geoarrow/convert-geoarrow-to-geojson.spec'; -test('TriangulationWorker#plumbing', async (t) => { - const arrowFile = await fetchFile(POINT_ARROW_FILE); - const arrowContent = await arrowFile.arrayBuffer(); - - // simulate parsing 1st batch/chunk of the arrow data in web worker from e.g. kepler - const sourceData = { - operation: 'parseGeoArrow', - arrowData: arrowContent, - chunkIndex: 0, - geometryColumnName: 'geometry', - geometryEncoding: 'geoarrow.point', - meanCenter: true, - triangle: false - }; - const parsedGeoArrowData = await processOnWorker(TriangulationWorker, sourceData, { - _workerType: 'test' - }); - - // kepler should await for the result from web worker and render the binary geometries - console.log(parsedGeoArrowData); - - t.ok(parsedGeoArrowData, 'ParseGeoArrow worker echoed input data'); - t.end(); -}); - // WORKER TESTS test('TriangulationWorker#plumbing', async (t) => { const sourceData = { @@ -65,12 +40,11 @@ test('TriangulationWorker#plumbing', async (t) => { t.end(); }); -test.skip('triangulateOnWorker#plumbing', async (t) => { - t.ok(triangulateOnWorker, 'triangulateOnWorker defined'); +test.skip('triangulateOnWorker', async (t) => { + t.ok(triangulateOnWorker, 'triangulateOnWorker imported ok'); /* const triangulatedData = await triangulateOnWorker( { - operation: 'test', data: new ArrayBuffer(100) }, { @@ -87,12 +61,40 @@ test.skip('triangulateOnWorker#plumbing', async (t) => { // operation: 'error', // _workerType: 'test' // }), 'Triangulation worker throws on incorrect operation'); - */ if (!isBrowser) { const workerFarm = WorkerFarm.getWorkerFarm({}); workerFarm.destroy(); } + */ + t.end(); +}); + +test('parseGeoArrowOnWorker', async (t) => { + const arrowFile = await fetchFile(POINT_ARROW_FILE); + const arrowContent = await arrowFile.arrayBuffer(); + // simulate parsing 1st batch/chunk of the arrow data in web worker from e.g. kepler + const parsedGeoArrowData = await parseGeoArrowOnWorker( + { + operation: 'parse-geoarrow', + arrowData: arrowContent, + chunkIndex: 0, + geometryColumnName: 'geometry', + geometryEncoding: 'geoarrow.point', + meanCenter: true, + triangle: false + }, + { + _workerType: 'test' + } + ); + + // kepler should await for the result from web worker and render the binary geometries + const {binaryGeometries, bounds, featureTypes, meanCenters} = parsedGeoArrowData.binaryGeometries!; + t.ok(binaryGeometries, 'ParseGeoArrow worker returned binaryGeometries'); + t.ok(bounds, 'ParseGeoArrow worker returned binaryGeometries'); + t.ok(featureTypes, 'ParseGeoArrow worker returned featureTypes'); + t.ok(meanCenters, 'ParseGeoArrow worker returned meanCenters'); t.end(); }); From 77141f259ab5f433634ed6148845f0b22bf76b4f Mon Sep 17 00:00:00 2001 From: Ib Green Date: Wed, 22 Nov 2023 11:07:14 -0500 Subject: [PATCH 08/14] wip --- modules/arrow/test/triangulate-on-worker.spec.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index 00488dbc50..b1650c1c6e 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -5,7 +5,8 @@ import test from 'tape-promise/tape'; import {triangulateOnWorker, parseGeoArrowOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; -import {POINT_ARROW_FILE} from './geoarrow/convert-geoarrow-to-geojson.spec'; + +export const POINT_ARROW_FILE = '@loaders.gl/arrow/test/data/point.arrow'; // WORKER TESTS test('TriangulationWorker#plumbing', async (t) => { From 5b908a3cdfccf6b8c6ef3a5e7e874d92fe965363 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Wed, 22 Nov 2023 10:40:06 -0700 Subject: [PATCH 09/14] postmessage one batch of geometries --- .../src/parsers/parse-arrow-in-batches.ts | 1 + modules/arrow/src/triangulate-on-worker.ts | 4 +- .../arrow/src/workers/triangulation-worker.ts | 31 ++++++++++++---- .../arrow/test/triangulate-on-worker.spec.ts | 37 +++++++++++++++++++ 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/modules/arrow/src/parsers/parse-arrow-in-batches.ts b/modules/arrow/src/parsers/parse-arrow-in-batches.ts index f75960a7b8..b412528b24 100644 --- a/modules/arrow/src/parsers/parse-arrow-in-batches.ts +++ b/modules/arrow/src/parsers/parse-arrow-in-batches.ts @@ -37,6 +37,7 @@ export function parseArrowInBatches( shape: 'arrow-table', batchType: 'data', data: new arrow.Table([recordBatch]), + rawArrayBuffer: asyncIterator, length: recordBatch.data.length }; // processBatch(recordBatch); diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index 68c06cab41..89dbf8e1d0 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -21,7 +21,7 @@ export type TriangulationWorkerOutput = export type ParseGeoArrowInput = { operation: 'parse-geoarrow'; - arrowData: ArrayBuffer; + arrowData: any; chunkIndex: number; geometryColumnName: string; geometryEncoding: GeoArrowEncoding; @@ -31,7 +31,7 @@ export type ParseGeoArrowInput = { export type ParseGeoArrowResult = { chunkIndex: number; - binaryGeometries: BinaryDataFromGeoArrow | null; + binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null; }; /** Input data for operation: 'triangulate' */ diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index 41bd265793..e2a9fd03c0 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -57,21 +57,38 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult { * @returns */ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { - let binaryGeometries: BinaryDataFromGeoArrow | null = null; - const {arrowData, chunkIndex, geometryColumnName, geometryEncoding, meanCenter, triangle} = data; - const arrowTable = arrow.tableFromIPC(arrowData); - const geometryColumn = arrowTable.getChild(geometryColumnName); + let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null; + const {arrowData, chunkIndex, geometryEncoding, meanCenter, triangle} = data; + // const batches = arrow.RecordBatchReader.from(arrowData); + console.log(arrowData, typeof arrowData); + const newdata = new arrow.Data( + arrowData.type, + arrowData.offset, + arrowData.length, + arrowData.nullCount, + arrowData.buffers, + arrowData.children, + arrowData.dictionary + ); + const geometryColumn = arrow.makeVector(newdata); + console.log('geometryColumn', geometryColumn.data); + // const arrowTable = new arrow.Table([arrowData.batches]); + // const geometryColumn = arrowTable.getChild(geometryColumnName); if (geometryColumn) { const options = {meanCenter, triangle, chunkIndex}; - binaryGeometries = getBinaryGeometriesFromArrow(geometryColumn, geometryEncoding, options); + binaryDataFromGeoArrow = getBinaryGeometriesFromArrow( + geometryColumn, + geometryEncoding, + options + ); // NOTE: here binaryGeometry will be copied to main thread return { - binaryGeometries, + binaryDataFromGeoArrow, chunkIndex: data.chunkIndex }; } return { - binaryGeometries, + binaryDataFromGeoArrow, chunkIndex: data.chunkIndex }; } diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index b1650c1c6e..8783705d40 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -1,12 +1,49 @@ // loaders.gl, MIT license // Copyright (c) vis.gl contributors +import * as arrow from 'apache-arrow'; import test from 'tape-promise/tape'; import {triangulateOnWorker, parseGeoArrowOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; export const POINT_ARROW_FILE = '@loaders.gl/arrow/test/data/point.arrow'; +test.only('TriangulationWorker#plumbing', async (t) => { + const arrowFile = await fetchFile(POINT_ARROW_FILE); + const arrowContent = await arrowFile.arrayBuffer(); + const arrowTable = arrow.tableFromIPC(arrowContent); + const geometryColumn = arrowTable.getChild('geometry'); + const geometryChunk = geometryColumn?.data[0]; + console.log('geometryChunk', geometryChunk?.type); + + // simulate parsing 1st batch/chunk of the arrow data in web worker from e.g. kepler + const sourceData = { + operation: 'parseGeoArrow', + arrowData: { + type: {typeId: geometryChunk?.typeId, listSize: geometryChunk?.type?.listSize}, + offset: geometryChunk?.offset, + length: geometryChunk?.length, + nullCount: geometryChunk?.nullCount, + buffers: geometryChunk?.buffers, + children: geometryChunk?.children, + dictionary: geometryChunk?.dictionary + }, + chunkIndex: 0, + geometryColumnName: 'geometry', + geometryEncoding: 'geoarrow.point', + meanCenter: true, + triangle: false + }; + const parsedGeoArrowData = await processOnWorker(TriangulationWorker, sourceData, { + _workerType: 'test' + }); + + // kepler should await for the result from web worker and render the binary geometries + console.log(parsedGeoArrowData); + + t.ok(parsedGeoArrowData, 'ParseGeoArrow worker echoed input data'); + t.end(); +}); // WORKER TESTS test('TriangulationWorker#plumbing', async (t) => { From 9ad14c9ecf3598c2e76719f47103209ef918683c Mon Sep 17 00:00:00 2001 From: Xun Li Date: Wed, 22 Nov 2023 21:30:32 -0700 Subject: [PATCH 10/14] post message with geometry batch to match arrow batch loader Signed-off-by: Xun Li --- modules/arrow/src/triangulate-on-worker.ts | 12 ++- .../arrow/src/workers/triangulation-worker.ts | 29 +++-- .../arrow/test/triangulate-on-worker.spec.ts | 101 ++++++++---------- 3 files changed, 65 insertions(+), 77 deletions(-) diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index 89dbf8e1d0..2e0517ce30 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -1,6 +1,7 @@ // loaders.gl, MIT license // Copyright (c) vis.gl contributors +import * as arrow from 'apache-arrow'; import type {WorkerOptions} from '@loaders.gl/worker-utils'; import {processOnWorker} from '@loaders.gl/worker-utils'; import {BinaryDataFromGeoArrow, GeoArrowEncoding} from '@loaders.gl/arrow'; @@ -21,9 +22,16 @@ export type TriangulationWorkerOutput = export type ParseGeoArrowInput = { operation: 'parse-geoarrow'; - arrowData: any; + chunkData: { + type: arrow.DataType; + offset: number; + length: number; + nullCount: number; + buffers: any; + children: arrow.Data[]; + dictionary?: arrow.Vector; + }; chunkIndex: number; - geometryColumnName: string; geometryEncoding: GeoArrowEncoding; meanCenter: boolean; triangle: boolean; diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index e2a9fd03c0..3b9da3b2f8 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -50,7 +50,7 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult { /** * Reading the arrow file into memory is very fast. Parsing the geoarrow column is slow, and blocking the main thread. * To address this issue, we can move the parsing job from main thread to parallel web workers. - * Each web worker will parse the geoarrow column using one chunk/batch of arrow data, and return binary geometries to main thread. + * Each web worker will parse one chunk/batch of geoarrow column, and return binary geometries to main thread. * The app on the main thread will render the binary geometries and the parsing will not block the main thread. * * @param data @@ -58,22 +58,19 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult { */ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null; - const {arrowData, chunkIndex, geometryEncoding, meanCenter, triangle} = data; - // const batches = arrow.RecordBatchReader.from(arrowData); - console.log(arrowData, typeof arrowData); - const newdata = new arrow.Data( - arrowData.type, - arrowData.offset, - arrowData.length, - arrowData.nullCount, - arrowData.buffers, - arrowData.children, - arrowData.dictionary + const {chunkData, chunkIndex, geometryEncoding, meanCenter, triangle} = data; + // rebuild chunkData + const arrowData = new arrow.Data( + chunkData.type, + chunkData.offset, + chunkData.length, + chunkData.nullCount, + chunkData.buffers, + chunkData.children, + chunkData.dictionary ); - const geometryColumn = arrow.makeVector(newdata); - console.log('geometryColumn', geometryColumn.data); - // const arrowTable = new arrow.Table([arrowData.batches]); - // const geometryColumn = arrowTable.getChild(geometryColumnName); + // rebuild geometry column with chunkData + const geometryColumn = arrow.makeVector(arrowData); if (geometryColumn) { const options = {meanCenter, triangle, chunkIndex}; binaryDataFromGeoArrow = getBinaryGeometriesFromArrow( diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index 8783705d40..25fe05ed98 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -6,44 +6,7 @@ import test from 'tape-promise/tape'; import {triangulateOnWorker, parseGeoArrowOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; - -export const POINT_ARROW_FILE = '@loaders.gl/arrow/test/data/point.arrow'; -test.only('TriangulationWorker#plumbing', async (t) => { - const arrowFile = await fetchFile(POINT_ARROW_FILE); - const arrowContent = await arrowFile.arrayBuffer(); - const arrowTable = arrow.tableFromIPC(arrowContent); - const geometryColumn = arrowTable.getChild('geometry'); - const geometryChunk = geometryColumn?.data[0]; - console.log('geometryChunk', geometryChunk?.type); - - // simulate parsing 1st batch/chunk of the arrow data in web worker from e.g. kepler - const sourceData = { - operation: 'parseGeoArrow', - arrowData: { - type: {typeId: geometryChunk?.typeId, listSize: geometryChunk?.type?.listSize}, - offset: geometryChunk?.offset, - length: geometryChunk?.length, - nullCount: geometryChunk?.nullCount, - buffers: geometryChunk?.buffers, - children: geometryChunk?.children, - dictionary: geometryChunk?.dictionary - }, - chunkIndex: 0, - geometryColumnName: 'geometry', - geometryEncoding: 'geoarrow.point', - meanCenter: true, - triangle: false - }; - const parsedGeoArrowData = await processOnWorker(TriangulationWorker, sourceData, { - _workerType: 'test' - }); - - // kepler should await for the result from web worker and render the binary geometries - console.log(parsedGeoArrowData); - - t.ok(parsedGeoArrowData, 'ParseGeoArrow worker echoed input data'); - t.end(); -}); +import {GEOARROW_POINT_FILE} from './data/geoarrow/test-cases'; // WORKER TESTS test('TriangulationWorker#plumbing', async (t) => { @@ -109,30 +72,50 @@ test.skip('triangulateOnWorker', async (t) => { }); test('parseGeoArrowOnWorker', async (t) => { - const arrowFile = await fetchFile(POINT_ARROW_FILE); + const arrowFile = await fetchFile(GEOARROW_POINT_FILE); const arrowContent = await arrowFile.arrayBuffer(); + const arrowTable = arrow.tableFromIPC(arrowContent); // simulate parsing 1st batch/chunk of the arrow data in web worker from e.g. kepler - const parsedGeoArrowData = await parseGeoArrowOnWorker( - { - operation: 'parse-geoarrow', - arrowData: arrowContent, - chunkIndex: 0, - geometryColumnName: 'geometry', - geometryEncoding: 'geoarrow.point', - meanCenter: true, - triangle: false - }, - { - _workerType: 'test' - } - ); + const geometryColumn = arrowTable.getChild('geometry'); + const geometryChunk = geometryColumn?.data[0]; - // kepler should await for the result from web worker and render the binary geometries - const {binaryGeometries, bounds, featureTypes, meanCenters} = parsedGeoArrowData.binaryGeometries!; - t.ok(binaryGeometries, 'ParseGeoArrow worker returned binaryGeometries'); - t.ok(bounds, 'ParseGeoArrow worker returned binaryGeometries'); - t.ok(featureTypes, 'ParseGeoArrow worker returned featureTypes'); - t.ok(meanCenters, 'ParseGeoArrow worker returned meanCenters'); + if (geometryChunk) { + const chunkData = { + type: { + ...geometryChunk?.type, + typeId: geometryChunk?.typeId, + listSize: geometryChunk?.type?.listSize + }, + offset: geometryChunk.offset, + length: geometryChunk.length, + nullCount: geometryChunk.nullCount, + buffers: geometryChunk.buffers, + children: geometryChunk.children, + dictionary: geometryChunk.dictionary + }; + + const parsedGeoArrowData = await parseGeoArrowOnWorker( + { + operation: 'parse-geoarrow', + chunkData, + chunkIndex: 0, + geometryEncoding: 'geoarrow.point', + meanCenter: true, + triangle: false + }, + { + _workerType: 'test' + } + ); + + // kepler should await for the result from web worker and render the binary geometries + const {binaryGeometries, bounds, featureTypes, meanCenters} = + parsedGeoArrowData.binaryDataFromGeoArrow!; + t.ok(binaryGeometries, 'ParseGeoArrow worker returned binaryGeometries'); + t.ok(bounds, 'ParseGeoArrow worker returned binaryGeometries'); + t.ok(featureTypes, 'ParseGeoArrow worker returned featureTypes'); + t.ok(meanCenters, 'ParseGeoArrow worker returned meanCenters'); + } t.end(); }); From 19edcf10903e7709dcd94b2530720e69a265dcd7 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Tue, 28 Nov 2023 15:55:47 -0700 Subject: [PATCH 11/14] rebase Signed-off-by: Xun Li --- .../src/geoarrow/convert-geoarrow-to-binary-geometry.ts | 7 ++++++- modules/arrow/src/parsers/parse-arrow-in-batches.ts | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index 7dfe7f9312..9509cb5ea5 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -335,9 +335,14 @@ export function getTriangleIndices( * get binary polygons from geoarrow polygon column * @param chunk one chunk of geoarrow polygon column * @param geoEncoding the geo encoding of the geoarrow polygon column + * @param options options for getting binary geometries * @returns BinaryGeometryContent */ -function getBinaryPolygonsFromChunk(chunk: arrow.Data, geoEncoding: string): BinaryGeometryContent { +function getBinaryPolygonsFromChunk( + chunk: arrow.Data, + geoEncoding: string, + options?: BinaryGeometriesFromArrowOptions +): BinaryGeometryContent { const isMultiPolygon = geoEncoding === 'geoarrow.multipolygon'; const polygonData = isMultiPolygon ? chunk.children[0] : chunk; diff --git a/modules/arrow/src/parsers/parse-arrow-in-batches.ts b/modules/arrow/src/parsers/parse-arrow-in-batches.ts index b412528b24..f75960a7b8 100644 --- a/modules/arrow/src/parsers/parse-arrow-in-batches.ts +++ b/modules/arrow/src/parsers/parse-arrow-in-batches.ts @@ -37,7 +37,6 @@ export function parseArrowInBatches( shape: 'arrow-table', batchType: 'data', data: new arrow.Table([recordBatch]), - rawArrayBuffer: asyncIterator, length: recordBatch.data.length }; // processBatch(recordBatch); From b8fd5d9c2adacada20b5ad2119bf241c8b107359 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Thu, 30 Nov 2023 23:42:33 -0700 Subject: [PATCH 12/14] add kyle hard-clone Signed-off-by: Xun Li --- .../convert-geoarrow-to-binary-geometry.ts | 6 +- modules/arrow/src/index.ts | 1 + modules/arrow/src/triangulate-on-worker.ts | 22 +-- modules/arrow/src/workers/hard-clone.ts | 162 ++++++++++++++++++ .../arrow/src/workers/triangulation-worker.ts | 4 +- .../arrow/test/triangulate-on-worker.spec.ts | 28 +-- 6 files changed, 199 insertions(+), 24 deletions(-) create mode 100644 modules/arrow/src/workers/hard-clone.ts diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index 9509cb5ea5..248beb6bd4 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -90,7 +90,10 @@ export function getBinaryGeometriesFromArrow( line: geoEncoding === 'geoarrow.multilinestring' || geoEncoding === 'geoarrow.linestring' }; - const chunks = options?.chunkIndex ? [geoColumn.data[options?.chunkIndex]] : geoColumn.data; + const chunks = + options?.chunkIndex !== undefined && options?.chunkIndex >= 0 + ? [geoColumn.data[options?.chunkIndex]] + : geoColumn.data; let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity]; let globalFeatureIdOffset = 0; const binaryGeometries: BinaryFeatures[] = []; @@ -307,6 +310,7 @@ export function getTriangleIndices( } primitiveIndex++; } + // TODO check if each ring is closed const triangleIndices = earcut( slicedFlatCoords, holeIndices.length > 0 ? holeIndices : undefined, diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index 8d61b91e5c..e030054718 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -65,6 +65,7 @@ export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson-geo export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-table'; // EXPERIMENTAL WORKER +export {hardClone} from './workers/hard-clone'; export { TriangulationWorker, diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index 2e0517ce30..e06032100f 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -20,20 +20,22 @@ export type TriangulationWorkerOutput = | ({operation: 'parse-geoarrow'} & ParseGeoArrowResult) | {operation: 'test'; data: any}; +type GeoArrowChunkData = { + type: arrow.DataType; + offset: number; + length: number; + nullCount: number; + buffers: any; + children: arrow.Data[]; + dictionary?: arrow.Vector; +}; + export type ParseGeoArrowInput = { operation: 'parse-geoarrow'; - chunkData: { - type: arrow.DataType; - offset: number; - length: number; - nullCount: number; - buffers: any; - children: arrow.Data[]; - dictionary?: arrow.Vector; - }; + chunkData: GeoArrowChunkData; chunkIndex: number; geometryEncoding: GeoArrowEncoding; - meanCenter: boolean; + calculateMeanCenters: boolean; triangle: boolean; }; diff --git a/modules/arrow/src/workers/hard-clone.ts b/modules/arrow/src/workers/hard-clone.ts new file mode 100644 index 0000000000..2ef7c48786 --- /dev/null +++ b/modules/arrow/src/workers/hard-clone.ts @@ -0,0 +1,162 @@ +import * as arrow from 'apache-arrow'; +import type {Buffers} from 'apache-arrow/data'; + +type TypedArray = + | Uint8Array + | Uint8ClampedArray + | Uint16Array + | Uint32Array + | Int8Array + | Int16Array + | Int32Array + | Float32Array + | Float64Array; + +/** + * Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if + * it is shared with other. + * + * The purpose of this function is to enable transferring a `Data` instance, + * e.g. to a web worker, without neutering any other data. + * + * Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where + * the typed array's `byteOffset` is not `0` and where its `byteLength` does not + * match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`. + * + * If `force` is `true`, always clone internal buffers, even if not shared. If + * the default, `false`, any internal buffers that are **not** a slice of a + * larger `ArrayBuffer` will not be copied. + */ +export function hardClone( + input: arrow.Data, + force?: boolean +): arrow.Data; +export function hardClone( + input: arrow.Vector, + force?: boolean +): arrow.Vector; + +export function hardClone( + data: arrow.Data | arrow.Vector, + force: boolean = false +): arrow.Data | arrow.Vector { + // Check if `data` is an arrow.Vector + if ('data' in data) { + return new arrow.Vector(data.data.map((data) => hardClone(data, force))); + } + + // Clone each of the children, recursively + const clonedChildren: arrow.Data[] = []; + for (const childData of data.children) { + clonedChildren.push(hardClone(childData, force)); + } + + // Clone the dictionary if there is one + let clonedDictionary: arrow.Vector | undefined; + if (data.dictionary !== undefined) { + clonedDictionary = hardClone(data.dictionary, force); + } + + // Buffers can have up to four entries. Each of these can be `undefined` for + // one or more array types. + // + // - OFFSET: value offsets for variable size list types + // - DATA: the underlying data + // - VALIDITY: the null buffer. This may be empty or undefined if all elements + // are non-null/valid. + // - TYPE: type ids for a union type. + const clonedBuffers: Buffers = { + [arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force), + [arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force), + [arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force), + [arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force) + }; + + // Note: the data.offset is passed on so that a sliced Data instance will not + // be "un-sliced". However keep in mind that this means we're cloning the + // _original backing buffer_, not only the portion of the Data that was + // sliced. + return new arrow.Data( + data.type, + data.offset, + data.length, + // @ts-expect-error _nullCount is protected. We're using it here to mimic + // `Data.clone` + data._nullCount, + clonedBuffers, + clonedChildren, + clonedDictionary + ); +} + +/** + * Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`. + */ +export function isShared(data: arrow.Data | arrow.Vector): boolean { + // Loop over arrow.Vector + if ('data' in data) { + return data.data.some((data) => isShared(data)); + } + + // Check child data + for (const childData of data.children) { + if (isShared(childData)) { + return true; + } + } + + // Check dictionary + if (data.dictionary !== undefined) { + if (isShared(data.dictionary)) { + return true; + } + } + + const bufferTypes = [ + arrow.BufferType.OFFSET, + arrow.BufferType.DATA, + arrow.BufferType.VALIDITY, + arrow.BufferType.TYPE + ]; + for (const bufferType of bufferTypes) { + if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) { + return true; + } + } + + return false; +} + +/** + * Returns true if the current typed array is a partial slice on a larger + * ArrayBuffer + */ +function isTypedArraySliced(arr: TypedArray): boolean { + return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength); +} + +/** + * If a slice of a larger ArrayBuffer, clone to a fresh `ArrayBuffer`. + * + * If `force` is `true`, always clone the array, even if not shared. + */ +function cloneBuffer(arr: A, force: boolean): A { + // Not all buffer types are defined for every type of Arrow array. E.g. + // `arrow.BufferType.TYPE` is only defined for the Union type. + if (arr === undefined) { + return arr; + } + + // The current array is not a part of a larger ArrayBuffer, don't clone it + if (!force && !isTypedArraySliced(arr)) { + return arr; + } + + // Note: TypedArray.slice() **copies** into a new ArrayBuffer + + // @ts-expect-error 'Uint8Array' is assignable to the constraint of type 'A', + // but 'A' could be instantiated with a different subtype of constraint + // 'TypedArray' + // We know from arr.slice that it will always return the same + return arr.slice(); +} diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index 3b9da3b2f8..b76bc8e43e 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -58,7 +58,7 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult { */ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null; - const {chunkData, chunkIndex, geometryEncoding, meanCenter, triangle} = data; + const {chunkData, chunkIndex, geometryEncoding, calculateMeanCenters, triangle} = data; // rebuild chunkData const arrowData = new arrow.Data( chunkData.type, @@ -72,7 +72,7 @@ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { // rebuild geometry column with chunkData const geometryColumn = arrow.makeVector(arrowData); if (geometryColumn) { - const options = {meanCenter, triangle, chunkIndex}; + const options = {calculateMeanCenters, triangle, chunkIndex}; binaryDataFromGeoArrow = getBinaryGeometriesFromArrow( geometryColumn, geometryEncoding, diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index 25fe05ed98..d3ef687eab 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -3,7 +3,12 @@ import * as arrow from 'apache-arrow'; import test from 'tape-promise/tape'; -import {triangulateOnWorker, parseGeoArrowOnWorker, TriangulationWorker} from '@loaders.gl/arrow'; +import { + triangulateOnWorker, + parseGeoArrowOnWorker, + TriangulationWorker, + hardClone +} from '@loaders.gl/arrow'; import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; import {GEOARROW_POINT_FILE} from './data/geoarrow/test-cases'; @@ -81,18 +86,19 @@ test('parseGeoArrowOnWorker', async (t) => { const geometryChunk = geometryColumn?.data[0]; if (geometryChunk) { + const chunkCopy = hardClone(geometryChunk, true); const chunkData = { type: { - ...geometryChunk?.type, - typeId: geometryChunk?.typeId, - listSize: geometryChunk?.type?.listSize + ...chunkCopy?.type, + typeId: chunkCopy?.typeId, + listSize: chunkCopy?.type?.listSize }, - offset: geometryChunk.offset, - length: geometryChunk.length, - nullCount: geometryChunk.nullCount, - buffers: geometryChunk.buffers, - children: geometryChunk.children, - dictionary: geometryChunk.dictionary + offset: chunkCopy.offset, + length: chunkCopy.length, + nullCount: chunkCopy.nullCount, + buffers: chunkCopy.buffers, + children: chunkCopy.children, + dictionary: chunkCopy.dictionary }; const parsedGeoArrowData = await parseGeoArrowOnWorker( @@ -101,7 +107,7 @@ test('parseGeoArrowOnWorker', async (t) => { chunkData, chunkIndex: 0, geometryEncoding: 'geoarrow.point', - meanCenter: true, + calculateMeanCenters: true, triangle: false }, { From a3f801ac78481d9892fef4195866204173bc901e Mon Sep 17 00:00:00 2001 From: Xun Li Date: Sat, 2 Dec 2023 16:40:11 -0700 Subject: [PATCH 13/14] support multiple web workers Signed-off-by: Xun Li --- .../convert-geoarrow-to-binary-geometry.ts | 22 +++++----- modules/arrow/src/index.ts | 3 +- .../arrow/src/workers/triangulation-worker.ts | 7 +-- ...onvert-geoarrow-to-binary-geometry.spec.ts | 44 +++++++++---------- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index 248beb6bd4..85679b1472 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -54,13 +54,15 @@ type BinaryGeometryContent = { /** * binary geometry template, see deck.gl BinaryGeometry */ -export const BINARY_GEOMETRY_TEMPLATE = { - globalFeatureIds: {value: new Uint32Array(0), size: 1}, - positions: {value: new Float32Array(0), size: 2}, - properties: [], - numericProps: {}, - featureIds: {value: new Uint32Array(0), size: 1} -}; +export function getBinaryGeometryTemplate() { + return { + globalFeatureIds: {value: new Uint32Array(0), size: 1}, + positions: {value: new Float32Array(0), size: 2}, + properties: [], + numericProps: {}, + featureIds: {value: new Uint32Array(0), size: 1} + }; +} export type BinaryGeometriesFromArrowOptions = { /** option to specify which chunk to get binary geometries from, for progressive rendering */ @@ -127,18 +129,18 @@ export function getBinaryGeometriesFromArrow( shape: 'binary-feature-collection', points: { type: 'Point', - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), ...(featureTypes.point ? binaryContent : {}) }, lines: { type: 'LineString', - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), ...(featureTypes.line ? binaryContent : {}), pathIndices: {value: featureTypes.line ? geomOffset : new Uint16Array(0), size: 1} }, polygons: { type: 'Polygon', - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), ...(featureTypes.polygon ? binaryContent : {}), polygonIndices: { // use geomOffset as polygonIndices same as primitivePolygonIndices since we are using earcut to get triangule indices diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index e030054718..c9d0229169 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -52,7 +52,7 @@ export type { BinaryGeometriesFromArrowOptions } from './geoarrow/convert-geoarrow-to-binary-geometry'; export { - BINARY_GEOMETRY_TEMPLATE, + getBinaryGeometryTemplate, getBinaryGeometriesFromArrow, getTriangleIndices, getMeanCentersFromBinaryGeometries @@ -67,6 +67,7 @@ export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-tabl // EXPERIMENTAL WORKER export {hardClone} from './workers/hard-clone'; +export type {ParseGeoArrowResult} from './triangulate-on-worker'; export { TriangulationWorker, triangulateOnWorker, diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index b76bc8e43e..1c07c68be8 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -72,7 +72,8 @@ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { // rebuild geometry column with chunkData const geometryColumn = arrow.makeVector(arrowData); if (geometryColumn) { - const options = {calculateMeanCenters, triangle, chunkIndex}; + // NOTE: for a rebuild arrow.Vector, there is only one chunk, so chunkIndex is always 0 + const options = {calculateMeanCenters, triangle, chunkIndex: 0}; binaryDataFromGeoArrow = getBinaryGeometriesFromArrow( geometryColumn, geometryEncoding, @@ -81,11 +82,11 @@ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { // NOTE: here binaryGeometry will be copied to main thread return { binaryDataFromGeoArrow, - chunkIndex: data.chunkIndex + chunkIndex }; } return { binaryDataFromGeoArrow, - chunkIndex: data.chunkIndex + chunkIndex }; } diff --git a/modules/arrow/test/geoarrow/convert-geoarrow-to-binary-geometry.spec.ts b/modules/arrow/test/geoarrow/convert-geoarrow-to-binary-geometry.spec.ts index 2c87dc2a93..7354f7de3b 100644 --- a/modules/arrow/test/geoarrow/convert-geoarrow-to-binary-geometry.spec.ts +++ b/modules/arrow/test/geoarrow/convert-geoarrow-to-binary-geometry.spec.ts @@ -6,7 +6,7 @@ import test, {Test} from 'tape-promise/tape'; import {getGeometryColumnsFromSchema} from '@loaders.gl/gis'; import {load} from '@loaders.gl/core'; import { - BINARY_GEOMETRY_TEMPLATE, + getBinaryGeometryTemplate, ArrowLoader, getBinaryGeometriesFromArrow, serializeArrowSchema @@ -27,7 +27,7 @@ const expectedPointBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point', globalFeatureIds: {value: new Uint32Array([0, 1]), size: 1}, positions: {value: new Float64Array([1, 1, 2, 2]), size: 2}, @@ -35,12 +35,12 @@ const expectedPointBinaryGeometry = { featureIds: {value: new Uint32Array([0, 1]), size: 1} }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', pathIndices: {value: new Uint16Array(0), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', polygonIndices: {value: new Uint16Array(0), size: 1}, primitivePolygonIndices: {value: new Uint16Array(0), size: 1} @@ -60,7 +60,7 @@ const expectedMultiPointBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point', globalFeatureIds: {value: new Uint32Array([0, 0, 1, 1]), size: 1}, positions: {value: new Float64Array([1, 1, 2, 2, 3, 3, 4, 4]), size: 2}, @@ -68,12 +68,12 @@ const expectedMultiPointBinaryGeometry = { featureIds: {value: new Uint32Array([0, 0, 1, 1]), size: 1} }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', pathIndices: {value: new Uint16Array(0), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', polygonIndices: {value: new Uint16Array(0), size: 1}, primitivePolygonIndices: {value: new Uint16Array(0), size: 1} @@ -93,11 +93,11 @@ const expectedLineBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point' }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', globalFeatureIds: {value: new Uint32Array([0, 0, 1, 1]), size: 1}, positions: {value: new Float64Array([0, 0, 1, 1, 2, 2, 3, 3]), size: 2}, @@ -106,7 +106,7 @@ const expectedLineBinaryGeometry = { pathIndices: {value: new Int32Array([0, 2, 4]), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', polygonIndices: {value: new Uint16Array(0), size: 1}, primitivePolygonIndices: {value: new Uint16Array(0), size: 1} @@ -126,11 +126,11 @@ const expectedMultiLineBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point' }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', globalFeatureIds: {value: new Uint32Array([0, 0, 0, 0, 1, 1, 1, 1]), size: 1}, positions: { @@ -142,7 +142,7 @@ const expectedMultiLineBinaryGeometry = { pathIndices: {value: new Int32Array([0, 2, 4, 6, 8]), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', polygonIndices: {value: new Uint16Array(0), size: 1}, primitivePolygonIndices: {value: new Uint16Array(0), size: 1} @@ -162,16 +162,16 @@ const expectedPolygonBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point' }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', pathIndices: {value: new Uint16Array(0), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', globalFeatureIds: { value: new Uint32Array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]), @@ -204,16 +204,16 @@ const expectedMultiPolygonBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point' }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', pathIndices: {value: new Uint16Array(0), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', globalFeatureIds: { value: new Uint32Array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), @@ -244,16 +244,16 @@ const expectedMultiPolygonHolesBinaryGeometry = { { shape: 'binary-feature-collection', points: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Point' }, lines: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'LineString', pathIndices: {value: new Uint16Array(0), size: 1} }, polygons: { - ...BINARY_GEOMETRY_TEMPLATE, + ...getBinaryGeometryTemplate(), type: 'Polygon', globalFeatureIds: { value: new Uint32Array([ From 44652be5c69368a8fbfe4a399a01103e86a1d0c2 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Mon, 4 Dec 2023 12:51:17 -0700 Subject: [PATCH 14/14] add chunkOffset for globalFeatureId in webworker Signed-off-by: Xun Li --- .../convert-geoarrow-to-binary-geometry.ts | 4 ++- modules/arrow/src/index.ts | 2 +- modules/arrow/src/triangulate-on-worker.ts | 1 + .../arrow/src/workers/triangulation-worker.ts | 7 +++-- .../arrow/test/triangulate-on-worker.spec.ts | 29 ++++++++++--------- 5 files changed, 24 insertions(+), 19 deletions(-) diff --git a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts index 85679b1472..ad484595b3 100644 --- a/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +++ b/modules/arrow/src/geoarrow/convert-geoarrow-to-binary-geometry.ts @@ -67,6 +67,8 @@ export function getBinaryGeometryTemplate() { export type BinaryGeometriesFromArrowOptions = { /** option to specify which chunk to get binary geometries from, for progressive rendering */ chunkIndex?: number; + /** The offset (beginning index of rows) of input chunk. Used for reconstructing globalFeatureIds in web workers */ + chunkOffset?: number; /** option to get mean centers from geometries, for polygon filtering */ calculateMeanCenters?: boolean; /** option to compute the triangle indices by tesselating polygons */ @@ -97,7 +99,7 @@ export function getBinaryGeometriesFromArrow( ? [geoColumn.data[options?.chunkIndex]] : geoColumn.data; let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity]; - let globalFeatureIdOffset = 0; + let globalFeatureIdOffset = options?.chunkOffset || 0; const binaryGeometries: BinaryFeatures[] = []; chunks.forEach((chunk) => { diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index c9d0229169..d3e01a0a7f 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -67,7 +67,7 @@ export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-tabl // EXPERIMENTAL WORKER export {hardClone} from './workers/hard-clone'; -export type {ParseGeoArrowResult} from './triangulate-on-worker'; +export type {ParseGeoArrowInput, ParseGeoArrowResult} from './triangulate-on-worker'; export { TriangulationWorker, triangulateOnWorker, diff --git a/modules/arrow/src/triangulate-on-worker.ts b/modules/arrow/src/triangulate-on-worker.ts index e06032100f..a44b9c4fb7 100644 --- a/modules/arrow/src/triangulate-on-worker.ts +++ b/modules/arrow/src/triangulate-on-worker.ts @@ -34,6 +34,7 @@ export type ParseGeoArrowInput = { operation: 'parse-geoarrow'; chunkData: GeoArrowChunkData; chunkIndex: number; + chunkOffset: number; geometryEncoding: GeoArrowEncoding; calculateMeanCenters: boolean; triangle: boolean; diff --git a/modules/arrow/src/workers/triangulation-worker.ts b/modules/arrow/src/workers/triangulation-worker.ts index 1c07c68be8..d6c784e11f 100644 --- a/modules/arrow/src/workers/triangulation-worker.ts +++ b/modules/arrow/src/workers/triangulation-worker.ts @@ -58,8 +58,9 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult { */ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null; - const {chunkData, chunkIndex, geometryEncoding, calculateMeanCenters, triangle} = data; - // rebuild chunkData + const {chunkData, chunkIndex, chunkOffset, geometryEncoding, calculateMeanCenters, triangle} = + data; + // rebuild chunkData that is only for geoarrow column const arrowData = new arrow.Data( chunkData.type, chunkData.offset, @@ -73,7 +74,7 @@ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult { const geometryColumn = arrow.makeVector(arrowData); if (geometryColumn) { // NOTE: for a rebuild arrow.Vector, there is only one chunk, so chunkIndex is always 0 - const options = {calculateMeanCenters, triangle, chunkIndex: 0}; + const options = {calculateMeanCenters, triangle, chunkIndex: 0, chunkOffset}; binaryDataFromGeoArrow = getBinaryGeometriesFromArrow( geometryColumn, geometryEncoding, diff --git a/modules/arrow/test/triangulate-on-worker.spec.ts b/modules/arrow/test/triangulate-on-worker.spec.ts index d3ef687eab..1d5e1e7266 100644 --- a/modules/arrow/test/triangulate-on-worker.spec.ts +++ b/modules/arrow/test/triangulate-on-worker.spec.ts @@ -7,7 +7,8 @@ import { triangulateOnWorker, parseGeoArrowOnWorker, TriangulationWorker, - hardClone + hardClone, + ParseGeoArrowInput } from '@loaders.gl/arrow'; import {fetchFile} from '@loaders.gl/core'; import {processOnWorker, isBrowser, WorkerFarm} from '@loaders.gl/worker-utils'; @@ -101,19 +102,19 @@ test('parseGeoArrowOnWorker', async (t) => { dictionary: chunkCopy.dictionary }; - const parsedGeoArrowData = await parseGeoArrowOnWorker( - { - operation: 'parse-geoarrow', - chunkData, - chunkIndex: 0, - geometryEncoding: 'geoarrow.point', - calculateMeanCenters: true, - triangle: false - }, - { - _workerType: 'test' - } - ); + const parseGeoArrowInput: ParseGeoArrowInput = { + operation: 'parse-geoarrow', + chunkData, + chunkIndex: 0, + chunkOffset: 0, + geometryEncoding: 'geoarrow.point', + calculateMeanCenters: true, + triangle: false + }; + + const parsedGeoArrowData = await parseGeoArrowOnWorker(parseGeoArrowInput, { + _workerType: 'test' + }); // kepler should await for the result from web worker and render the binary geometries const {binaryGeometries, bounds, featureTypes, meanCenters} =