Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgreen committed Jan 30, 2024
1 parent 8caf8ce commit cc9b2e9
Show file tree
Hide file tree
Showing 9 changed files with 22 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
export const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
export const PARQUET_WASM_URL = 'https://unpkg.com/[email protected]/esm/arrow1_bg.wasm';

/**
* Parquet File Magic String
*/
Expand Down
5 changes: 3 additions & 2 deletions modules/parquet/src/lib/wasm/encode-parquet-wasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ export async function encode(
const ipcStream = arrow.tableToIPC(arrowTable);

// Pass the IPC stream to the Parquet writer.
const wasmTable = wasm.Table.fromIPCStream(ipcStream);
// const wasmTable = wasm.Table.fromIPCStream(ipcStream);
const wasmProperties = new wasm.WriterPropertiesBuilder().build();
try {
const parquetBytes = wasm.writeParquet(wasmTable, wasmProperties);
const parquetBytes = wasm.writeParquet(ipcStream, wasmProperties);

Check failure on line 30 in modules/parquet/src/lib/wasm/encode-parquet-wasm.ts

View workflow job for this annotation

GitHub Actions / test (16)

Argument of type 'Uint8Array' is not assignable to parameter of type 'Table'.

Check failure on line 30 in modules/parquet/src/lib/wasm/encode-parquet-wasm.ts

View workflow job for this annotation

GitHub Actions / test (18)

Argument of type 'Uint8Array' is not assignable to parameter of type 'Table'.

Check failure on line 30 in modules/parquet/src/lib/wasm/encode-parquet-wasm.ts

View workflow job for this annotation

GitHub Actions / test (20)

Argument of type 'Uint8Array' is not assignable to parameter of type 'Table'.
// const parquetBytes = wasm.writeParquet(wasmTable, wasmProperties);
return parquetBytes.buffer.slice(
parquetBytes.byteOffset,
parquetBytes.byteLength + parquetBytes.byteOffset
Expand Down
19 changes: 0 additions & 19 deletions modules/parquet/src/lib/wasm/load-wasm-browser.ts

This file was deleted.

7 changes: 4 additions & 3 deletions modules/parquet/src/lib/wasm/load-wasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,18 @@
// Copyright (c) vis.gl contributors

// eslint-disable-next-line import/default
import initWasm from 'parquet-wasm';
import initWasm from 'parquet-wasm/esm/arrow1';
import * as parquetWasm from 'parquet-wasm';
import {PARQUET_WASM_URL} from '../constants';

let initializePromise: any;

export async function loadWasm(wasmUrl?: string) {
export async function loadWasm(wasmUrl: string = PARQUET_WASM_URL) {
if (!initializePromise && typeof initWasm === 'function') {
if (!wasmUrl) {
throw new Error('ParquetLoader: No wasmUrl provided');
}
// @ts-expect-error
// @ts-ignore
initializePromise = initWasm(wasmUrl);
}
await initializePromise;
Expand Down
7 changes: 2 additions & 5 deletions modules/parquet/src/parquet-wasm-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@ import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-u
import type {ArrowTable} from '@loaders.gl/arrow';

import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
import {VERSION, PARQUET_WASM_URL} from './lib/constants';

/** Parquet WASM loader options */
export type ParquetWasmLoaderOptions = LoaderOptions & {
Expand All @@ -34,7 +31,7 @@ export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoade
options: {
parquet: {
type: 'arrow-table',
wasmUrl: 'https://unpkg.com/[email protected]/esm/arrow1_bg.wasm'
wasmUrl: PARQUET_WASM_URL
}
}
};
Expand Down
6 changes: 2 additions & 4 deletions modules/parquet/src/parquet-wasm-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@ import type {ArrowTable} from '@loaders.gl/arrow';
import {encode} from './lib/wasm/encode-parquet-wasm';
import type {WriterOptions} from '@loaders.gl/loader-utils';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
import {VERSION, PARQUET_WASM_URL} from './lib/constants';

export type ParquetWriterOptions = WriterOptions & {
parquet?: {
Expand All @@ -28,7 +26,7 @@ export const ParquetWasmWriter: WriterWithEncoder<ArrowTable, never, ParquetWrit
binary: true,
options: {
parquet: {
wasmUrl: 'https://unpkg.com/[email protected]/esm2/arrow1_bg.wasm'
wasmUrl: PARQUET_WASM_URL
}
},
encode(arrowTable: ArrowTable, options?: ParquetWriterOptions) {
Expand Down
2 changes: 1 addition & 1 deletion modules/parquet/src/parquetjs/parser/decoders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
Type
} from '../parquet-thrift';
import {decompress} from '../compression';
import {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';
import {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../lib/constants';
import {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';

/**
Expand Down
2 changes: 1 addition & 1 deletion modules/parquet/src/parquetjs/parser/parquet-reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {ParquetSchema} from '../schema/schema';
import {decodeSchema} from './decoders';
import {materializeRows} from '../schema/shred';

import {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../constants';
import {PARQUET_MAGIC, PARQUET_MAGIC_ENCRYPTED} from '../../lib/constants';
import {ColumnChunk, CompressionCodec, FileMetaData, RowGroup, Type} from '../parquet-thrift';
import {
ParquetRowGroup,
Expand Down
23 changes: 4 additions & 19 deletions modules/parquet/test/parquet-wasm-loader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import * as arrow from 'apache-arrow';
import {WASM_SUPPORTED_FILES} from './data/files';

const PARQUET_DIR = '@loaders.gl/parquet/test/data';
const WASM_URL = 'node_modules/parquet-wasm/esm2/arrow1_bg.wasm';

setLoaderOptions({
_workerType: 'test'
Expand All @@ -21,13 +20,9 @@ test('ParquetLoader#loader objects', (t) => {
t.end();
});

test('ParquetWasmLoader#Load Parquet file', async (t) => {
test('Load Parquet file', async (t) => {
const url = `${PARQUET_DIR}/geoparquet/example.parquet`;
const table = await load(url, ParquetWasmLoader, {
parquet: {
wasmUrl: WASM_URL
}
});
const table = await load(url, ParquetWasmLoader, {});
const arrowTable = table.data;
t.equal(arrowTable.numRows, 5);
t.deepEqual(table.schema?.fields.map((f) => f.name), [
Expand All @@ -45,32 +40,22 @@ test('ParquetWasmLoader#load', async (t) => {
t.comment('SUPPORTED FILES');
for (const {title, path} of WASM_SUPPORTED_FILES) {
const url = `${PARQUET_DIR}/apache/${path}`;
const table = await load(url, ParquetWasmLoader, {
parquet: {
wasmUrl: WASM_URL
}
});
const table = await load(url, ParquetWasmLoader);
const arrowTable = table.data;
t.ok(arrowTable, `GOOD(${title})`);
}

t.end();
});

test('ParquetWasmWriter#writer/loader round trip', async (t) => {
test.only('ParquetWasmWriter#writer/loader round trip', async (t) => {
const table = createArrowTable();

const parquetBuffer = await encode(table, ParquetWasmWriter, {
worker: false,
parquet: {
wasmUrl: WASM_URL
}
});
const newTable = await load(parquetBuffer, ParquetWasmLoader, {
worker: false,
parquet: {
wasmUrl: WASM_URL
}
});

t.deepEqual(table.data.schema, newTable.data.schema);
Expand Down

0 comments on commit cc9b2e9

Please sign in to comment.