Skip to content

Commit

Permalink
refactor to parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
nofurtherinformation committed Mar 29, 2024
1 parent b9dc531 commit 52a1994
Show file tree
Hide file tree
Showing 7 changed files with 311 additions and 221 deletions.
18 changes: 12 additions & 6 deletions components/Map/Map.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,18 @@ const INITIAL_VIEW_STATE = {

const years = Array.from({ length: 25 }, (_, i) => 1997 + i)
export const Map = () => {
const { isReady, data, colorFunc, colors, ds, breaks, currentColumnSpec, currentDataSpec, currentFilter } =
const { isReady, data, testfn, colorFunc, colors, ds, breaks, currentColumnSpec, currentDataSpec, currentFilter } =
useDataService()
const getElementColor = (element: GeoJSON.Feature<GeoJSON.Polygon, GeoJSON.GeoJsonProperties>) => {
if (!isReady) {
return [120, 120, 120, 120]
}
const id = element?.properties?.GEOID
const d = data?.[id]
if (id === undefined || d === undefined) {
if (id === undefined) {
return [120, 120, 120, 120]
}
// @ts-ignore
return colorFunc(d)
return colorFunc(id)
}
const layers = [
new MVTLayer({
Expand Down Expand Up @@ -158,19 +157,26 @@ export const Map = () => {
const handleSetColumn = (col: string | number) => dispatch(setCurrentColumn(col))
const handleChangeData = (data: string) => dispatch(setCurrentData(data))
const handleSetFilter = (filter: string) => dispatch(setCurrentFilter(filter))

return (
<div style={{ width: "100vw", height: "100vh", position: "relative", top: 0, left: 0 }}>
<div style={{ position: "absolute", bottom: "2rem", right: "1rem", zIndex: 1000 }}>
<div className="ColorLegend">
<h3>{currentColumnSpec?.name}</h3>
{!!(colors.length && breaks.length) &&
{!!(colors?.length && breaks?.length) &&
colors.map((_, i) => <BreakText key={i} colors={colors} breaks={breaks} index={i} />)}
<p style={{ maxWidth: "35ch", fontSize: "0.75rem" }}>
<i>{currentDataSpec?.attribution}</i>
</p>
</div>
</div>
{/* <button style={{
position:'fixed',
top: 0,
left: 0,
zIndex: 500,
background:'red'
}} onClick={testfn}>TEST FN</button> */}
<div className="absolute left-4 top-4 z-50">
<DropdownMenuDemo>
<div className="p-4 max-w-[100vw]">
Expand Down
2 changes: 1 addition & 1 deletion components/Pages/Home/Renderer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { TinaMarkdown } from "tinacms/dist/rich-text"

export const Renderer: React.FC<HomeProps> = ({ pageInfo }) => {
const sections = pageInfo.data.page.sections || []

return (
<article className="prose p-4">
<h1>Pages</h1>
Expand Down
1 change: 1 addition & 0 deletions components/Pages/Map/Renderer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import React from "react"
// lazy load the map
import dynamic from "next/dynamic"
const Map = dynamic(() => import("components/Map/Map"), { ssr: false })

export function Renderer() {
return (
<>
Expand Down
229 changes: 161 additions & 68 deletions utils/data/service.ts
Original file line number Diff line number Diff line change
@@ -1,109 +1,202 @@
import defaultConfig from "./config";
import { DataConfig } from "./config.types";
import * as P from 'papaparse';
import type { AsyncDuckDB, AsyncDuckDBConnection } from "@duckdb/duckdb-wasm";
import {
getDuckDb,
runQuery,
loadParquet
} from 'utils/duckdb'
import defaultConfig from "./config"
import { DataConfig } from "./config.types"
import { DuckDBDataProtocol, type AsyncDuckDB, type AsyncDuckDBConnection } from "@duckdb/duckdb-wasm"
import { getDuckDb, runQuery } from "utils/duckdb"
import * as d3 from "d3"
import tinycolor from "tinycolor2"

export class DataService {
config: DataConfig[];
data: Record<string, Record<string, Record<string|number, number>>> = {};
complete: Array<string> = [];
eagerData: Array<string> = [];
completeCallback?: (s: string) => void;
hasRunWasm: boolean = false;
dbStatus: 'none' |'loading' | 'loaded' | 'error' = 'none';
db?: AsyncDuckDB;
baseURL: string = window.location.origin;
conn?: AsyncDuckDBConnection;
config: DataConfig[]
data: Record<string, Record<string, Record<string | number, number>>> = {}
complete: Array<string> = []
eagerData: Array<string> = []
completeCallback?: (s: string) => void
hasRunWasm: boolean = false
dbStatus: "none" | "loading" | "loaded" | "error" = "none"
db?: AsyncDuckDB
baseURL: string = window.location.origin
conn?: AsyncDuckDBConnection

constructor(completeCallback?: (s: string) => void, config: DataConfig[] = defaultConfig) {
this.config = config;
this.completeCallback = completeCallback;
this.config = config
this.completeCallback = completeCallback
}

initData(){
console.log('FETCHING DATA!!!')
const eagerData = this.config.filter(c => c.eager);
eagerData.forEach(c => this.fetchData(c));
initData() {
const eagerData = this.config.filter((c) => c.eager)
eagerData.forEach((c) => this.registerData(c))
}

async waitForDb(){
if (this.dbStatus === 'loaded') {
return;
async waitForDb() {
if (this.dbStatus === "loaded") {
return
}
while (this.dbStatus === 'loading') {
await new Promise((r) => setTimeout(r, 100));
while (this.dbStatus === "loading") {
await new Promise((r) => setTimeout(r, 100))
}
}
async initDb(){
console.log('RUNNING WASM!!!')
if (this.dbStatus === 'loaded') {
return;
} else if (this.dbStatus === 'loading') {
console.log('Waiting for db to load');
return this.waitForDb();
}
this.dbStatus = 'loading';
async initDb() {
if (this.dbStatus === "loaded") {
return
} else if (this.dbStatus === "loading") {
return this.waitForDb()
}
this.dbStatus = "loading"
this.db = await getDuckDb()
this.conn = await this.db.connect()
this.dbStatus = 'loaded';
this.dbStatus = "loaded"
}

backgroundDataLoad(){
backgroundDataLoad() {
if (this.complete.length === this.config.length) {
const remainingData = this.config.filter(c => !this.complete.includes(c.filename));
remainingData.forEach(c => this.fetchData(c));
const remainingData = this.config.filter((c) => !this.complete.includes(c.filename))
remainingData.forEach((c) => this.registerData(c))
}
}

async registerData(config: DataConfig) {
if (this.complete.includes(config.filename)) {
return
}
await this.initDb()
await this.db!.registerFileURL(
config.filename,
`${this.baseURL}/${config.filename}`,
DuckDBDataProtocol.HTTP,
false
)
if (this.completeCallback) {
this.completeCallback(config.filename)
}
this.complete.push(config.filename)
}

getFromQueryString(filename: string) {
if (this.complete.includes(filename)) {
return `'${filename}'`
} else {
return `'${this.baseURL}/${filename}'`
}
}

async runQuery(query: string) {
await this.initDb()
try {
return await runQuery({
conn: this.conn!,
query,
})
} catch (e) {
console.error(e)
return []
}
}
ingestData(data: Array<any>, config: DataConfig, dataStore: any){
async getQuantiles(column: string | number, table: string, n: number): Promise<Array<number>> {
// breakpoints to use for quantile breaks
// eg. n=5 - 0.2, 0.4, 0.6, 0.8 - 4 breaks
// eg. n=4 - 0.25, 0.5, 0.75 - 3 breaks
const quantileFractions = Array.from({ length: n - 1 }, (_, i) => (i + 1) / n)
const query = `SELECT
${quantileFractions.map((f, i) => `approx_quantile("${column}", ${f}) as break${i}`)}
FROM ${this.getFromQueryString(table)};
`
const result = await this.runQuery(query)
if (!result || result.length === 0) {
console.error(`No results for quantile query: ${query}`)
return []
}
// @ts-ignore
return Object.values(result[0]) as Array<number>
}
async getColorValues(
idColumn: string,
colorScheme: string,
reversed: boolean,
column: string | number,
table: string,
n: number
) {
// @ts-ignore
const d3Colors = d3[colorScheme]?.[n]
if (!d3Colors) {
console.error(`Color scheme ${colorScheme} with ${n} bins not found`)
return {
colorMap: {},
breaks: [],
colors: [],
}
}
let rgbColors = d3Colors.map((c: any) => {
const tc = tinycolor(c).toRgb()
return [tc.r, tc.g, tc.b]
})
if (reversed) {
rgbColors.reverse()
}
const quantiles = await this.getQuantiles(column, table, n)
const query = `
SELECT "${column}", "${idColumn}",
CASE
${quantiles.map((q, i) => `WHEN "${column}" < ${q} THEN [${rgbColors[i]}]`).join("\n")}
ELSE [${rgbColors[rgbColors.length - 1]}]
END as color
FROM ${this.getFromQueryString(table)};
`
// @ts-ignore
const colorValues = await this.runQuery(query)
const colorMap = {}
for (let i = 0; i < colorValues.length; i++) {
// @ts-expect-error
colorMap[colorValues[i][idColumn]] = colorValues[i].color.toJSON()
}
return {
colorMap,
breaks: quantiles,
colors: rgbColors,
}
}

ingestData(data: Array<any>, config: DataConfig, dataStore: any) {
console.log(config, data[0])
for (let i=0; i<data.length; i++) {
const row = data[i];
for (let i = 0; i < data.length; i++) {
const row = data[i]
if (!row?.[config.id]) {
console.error(`Row ${i} in ${config.filename} is missing a valid id`);
continue;
console.error(`Row ${i} in ${config.filename} is missing a valid id`)
continue
}
let id = `${row[config.id]}`
// if (id.length === 10) {
// id = `0${id}`
// }
dataStore[id] = {
...row,
id
};
id,
}
// @ts-ignore
}
console.log("All done!");
console.log("All done!")
if (this.completeCallback) {
this.completeCallback(config.filename);
this.completeCallback(config.filename)
}
this.complete.push(config.filename);
this.complete.push(config.filename)
}
async fetchData(config: DataConfig){
async fetchData(config: DataConfig) {
if (this.complete.includes(config.filename)) {
return;
return
}
await this.initDb();
await this.initDb()
const dataStore = this.data[config.filename]
if (this.data[config.filename]) {
// console.error(`Data store already exists for ${config.filename}`);
return;
return
}
this.data[config.filename] = {};
const r = await runQuery(
this.db!,
`SELECT * FROM '${this.baseURL}/${config.filename}'`
)
this.ingestData(r, config, this.data[config.filename]);

this.data[config.filename] = {}
}

setCompleteCallback(cb: (s: string) => void){
this.completeCallback = cb;
this.complete.forEach(cb);
setCompleteCallback(cb: (s: string) => void) {
this.completeCallback = cb
this.complete.forEach(cb)
}
}
}

export const ds = new DataService()
Loading

0 comments on commit 52a1994

Please sign in to comment.