From 461f6fac854f9fd767b39e6d9a5f432dab2c4969 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:06:59 +0100 Subject: [PATCH 01/60] query memory buffer --- query.js | 65 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/query.js b/query.js index 64c8ce4..e8c77d1 100644 --- a/query.js +++ b/query.js @@ -153,21 +153,60 @@ class QueryClient { } const files = await this.findRelevantFiles(parsed.type, parsed.timeRange); - if (!files.length) { - return []; - } - const connection = await this.db.connect(); + try { - const query = ` - SELECT ${parsed.columns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) - ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} - ${parsed.conditions} - ${parsed.orderBy} - ${parsed.limit} - `; + // Get buffered data for this type + const buffer = this.buffer?.buffers.get(parsed.type); + let query; + + if (buffer?.rows?.length) { + // Create temp table from buffer + await connection.query(` + CREATE TEMP TABLE buffer_data AS + SELECT * FROM ( + VALUES ${buffer.rows.map(row => `( + ${buffer.isLineProtocol ? + `'${row.timestamp.toISOString()}', '${row.tags}', ${Object.entries(row).filter(([k]) => !['timestamp', 'tags'].includes(k)).map(([,v]) => typeof v === 'string' ? `'${v}'` : v).join(', ')}` : + `'${new Date(row.create_date).toISOString()}', '${JSON.stringify(row.protocol_header)}', '${row.raw || ''}'`} + )`).join(', ')} + ) t(${buffer.isLineProtocol ? + `timestamp, tags, ${Object.keys(buffer.rows[0]).filter(k => !['timestamp', 'tags'].includes(k)).join(', ')}` : + 'timestamp, rcinfo, payload'}) + `); + + // Union buffer with parquet data + query = ` + WITH parquet_data AS ( + SELECT ${parsed.columns} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} + ${parsed.conditions} + ) + SELECT * FROM ( + SELECT * FROM parquet_data + UNION ALL + SELECT ${parsed.columns} FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) + ${parsed.orderBy} + ${parsed.limit} + `; + } else { + // No buffer data, just query parquet + query = ` + SELECT ${parsed.columns} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} + ${parsed.conditions} + ${parsed.orderBy} + ${parsed.limit} + `; + } const reader = await connection.runAndReadAll(query); return reader.getRows().map(row => { From 06aedab1f50ba9872dc9b82993792a03378695e8 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:28:18 +0100 Subject: [PATCH 02/60] query mem buffer --- hepop.js | 13 ++++++------- query.js | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hepop.js b/hepop.js index 6bf1ddd..c00804d 100644 --- a/hepop.js +++ b/hepop.js @@ -966,19 +966,18 @@ class HEPServer { async initialize() { try { + // Initialize buffer manager this.buffer = new ParquetBufferManager(); await this.buffer.initialize(); - - this.compaction = new CompactionManager(this.buffer); - await this.compaction.initialize(); - // Initialize query client - this.queryClient = new QueryClient(this.buffer.baseDir); + // Initialize query client with buffer manager + this.queryClient = new QueryClient(this.buffer.baseDir, this.buffer); await this.queryClient.initialize(); - + + // Start servers await this.startServers(); } catch (error) { - console.error('Failed to initialize HEPServer:', error); + console.error('Failed to initialize HEP server:', error); throw error; } } diff --git a/query.js b/query.js index e8c77d1..cb01b4a 100644 --- a/query.js +++ b/query.js @@ -3,10 +3,11 @@ import path from 'path'; import fs from 'fs'; class QueryClient { - constructor(baseDir = './data') { + constructor(baseDir = './data', bufferManager = null) { this.baseDir = baseDir; this.db = null; this.defaultTimeRange = 10 * 60 * 1000000000; // 10 minutes in nanoseconds + this.buffer = bufferManager; // Store reference to buffer manager } async initialize() { From 1053555a564be0a08bb552bedfafdb4127f6997d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:29:27 +0100 Subject: [PATCH 03/60] query mem buffer --- query.js | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/query.js b/query.js index cb01b4a..c8f3e18 100644 --- a/query.js +++ b/query.js @@ -162,19 +162,35 @@ class QueryClient { let query; if (buffer?.rows?.length) { - // Create temp table from buffer - await connection.query(` - CREATE TEMP TABLE buffer_data AS - SELECT * FROM ( - VALUES ${buffer.rows.map(row => `( - ${buffer.isLineProtocol ? - `'${row.timestamp.toISOString()}', '${row.tags}', ${Object.entries(row).filter(([k]) => !['timestamp', 'tags'].includes(k)).map(([,v]) => typeof v === 'string' ? `'${v}'` : v).join(', ')}` : - `'${new Date(row.create_date).toISOString()}', '${JSON.stringify(row.protocol_header)}', '${row.raw || ''}'`} - )`).join(', ')} - ) t(${buffer.isLineProtocol ? - `timestamp, tags, ${Object.keys(buffer.rows[0]).filter(k => !['timestamp', 'tags'].includes(k)).join(', ')}` : + // Create temp table from buffer using VALUES + const valuesQuery = ` + CREATE TEMP TABLE IF NOT EXISTS buffer_data AS + SELECT * FROM (VALUES ${buffer.rows.map(row => { + if (buffer.isLineProtocol) { + return `( + TIMESTAMP '${row.timestamp.toISOString()}', + '${row.tags}', + ${Object.entries(row) + .filter(([k]) => !['timestamp', 'tags'].includes(k)) + .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) + .join(', ')} + )`; + } else { + return `( + TIMESTAMP '${new Date(row.create_date).toISOString()}', + '${JSON.stringify(row.protocol_header)}', + '${row.raw || ''}' + )`; + } + }).join(', ')}) + AS t(${buffer.isLineProtocol ? + `timestamp, tags, ${Object.keys(buffer.rows[0]) + .filter(k => !['timestamp', 'tags'].includes(k)) + .join(', ')}` : 'timestamp, rcinfo, payload'}) - `); + `; + + await connection.runAndReadAll(valuesQuery); // Union buffer with parquet data query = ` From caccb659c07f8d4b6b33039512de48e8ffb663b3 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:31:05 +0100 Subject: [PATCH 04/60] query mem buffer --- query.js | 51 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/query.js b/query.js index c8f3e18..c16584e 100644 --- a/query.js +++ b/query.js @@ -192,28 +192,40 @@ class QueryClient { await connection.runAndReadAll(valuesQuery); - // Union buffer with parquet data - query = ` - WITH parquet_data AS ( - SELECT ${parsed.columns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) - ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} - ${parsed.conditions} - ) - SELECT * FROM ( - SELECT * FROM parquet_data - UNION ALL + if (files.length > 0) { + // Union buffer with parquet data + query = ` + WITH parquet_data AS ( + SELECT ${parsed.columns} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} + ${parsed.conditions} + ) + SELECT * FROM ( + SELECT * FROM parquet_data + UNION ALL + SELECT ${parsed.columns} FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) + ${parsed.orderBy} + ${parsed.limit} + `; + } else { + // Only query buffer data + query = ` SELECT ${parsed.columns} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - ) - ${parsed.orderBy} - ${parsed.limit} - `; - } else { - // No buffer data, just query parquet + ${parsed.orderBy} + ${parsed.limit} + `; + } + } else if (files.length > 0) { + // Only query parquet files query = ` SELECT ${parsed.columns} FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) @@ -223,6 +235,9 @@ class QueryClient { ${parsed.orderBy} ${parsed.limit} `; + } else { + // No data available + return []; } const reader = await connection.runAndReadAll(query); From 58f24a2ea2f01c98d3a8d16abb10aaef2bf63a51 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:32:33 +0100 Subject: [PATCH 05/60] query mem buffer --- query.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/query.js b/query.js index c16584e..a39fb1f 100644 --- a/query.js +++ b/query.js @@ -201,14 +201,18 @@ class QueryClient { ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} ${parsed.conditions} + ), + buffer_data_selected AS ( + SELECT ${parsed.columns} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} ) SELECT * FROM ( SELECT * FROM parquet_data UNION ALL - SELECT ${parsed.columns} FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} + SELECT * FROM buffer_data_selected ) ${parsed.orderBy} ${parsed.limit} From 142b1e1be74f71175f9b4443e214b9630d996c9d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:33:43 +0100 Subject: [PATCH 06/60] query mem buffer --- query.js | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/query.js b/query.js index a39fb1f..f3c9d11 100644 --- a/query.js +++ b/query.js @@ -201,22 +201,23 @@ class QueryClient { ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} ${parsed.conditions} - ), - buffer_data_selected AS ( - SELECT ${parsed.columns} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} ) SELECT * FROM ( - SELECT * FROM parquet_data + (SELECT * FROM parquet_data) UNION ALL - SELECT * FROM buffer_data_selected + (SELECT ${parsed.columns} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions}) ) ${parsed.orderBy} ${parsed.limit} `; + + console.log('Generated query:', query); + console.log('Buffer rows:', buffer.rows.length); + console.log('Parquet files:', files.length); } else { // Only query buffer data query = ` From f491dd5039cc4de4f59672d56a505b8d338643b0 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:35:13 +0100 Subject: [PATCH 07/60] query mem buffer --- query.js | 64 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 20 deletions(-) diff --git a/query.js b/query.js index f3c9d11..2cfe7fc 100644 --- a/query.js +++ b/query.js @@ -193,27 +193,51 @@ class QueryClient { await connection.runAndReadAll(valuesQuery); if (files.length > 0) { - // Union buffer with parquet data - query = ` - WITH parquet_data AS ( + // For aggregate queries, combine data before aggregating + const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || + parsed.columns.toLowerCase().includes('avg('); + + if (isAggregateQuery) { + query = ` + WITH all_data AS ( + SELECT * FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT * FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) SELECT ${parsed.columns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) - ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} - ${parsed.conditions} - ) - SELECT * FROM ( - (SELECT * FROM parquet_data) - UNION ALL - (SELECT ${parsed.columns} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions}) - ) - ${parsed.orderBy} - ${parsed.limit} - `; + FROM all_data + ${parsed.orderBy} + ${parsed.limit} + `; + } else { + // Original query for non-aggregate queries + query = ` + WITH parquet_data AS ( + SELECT ${parsed.columns} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) + SELECT * FROM ( + (SELECT * FROM parquet_data) + UNION ALL + (SELECT ${parsed.columns} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions}) + ) + ${parsed.orderBy} + ${parsed.limit} + `; + } console.log('Generated query:', query); console.log('Buffer rows:', buffer.rows.length); From 46c994e603e6ae611fdc521afbf008e49f0f7aa0 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:37:34 +0100 Subject: [PATCH 08/60] query mem buffer --- query.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/query.js b/query.js index 2cfe7fc..e354fc6 100644 --- a/query.js +++ b/query.js @@ -200,12 +200,14 @@ class QueryClient { if (isAggregateQuery) { query = ` WITH all_data AS ( - SELECT * FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT * FROM buffer_data + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} @@ -238,10 +240,6 @@ class QueryClient { ${parsed.limit} `; } - - console.log('Generated query:', query); - console.log('Buffer rows:', buffer.rows.length); - console.log('Parquet files:', files.length); } else { // Only query buffer data query = ` From 80f974594602d5581353b4f2f27d5aa51f19d083 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:40:29 +0100 Subject: [PATCH 09/60] query mem buffer --- hepop.js | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/hepop.js b/hepop.js index c00804d..0729351 100644 --- a/hepop.js +++ b/hepop.js @@ -410,7 +410,7 @@ class ParquetBufferManager { } class CompactionManager { - constructor(bufferManager) { + constructor(bufferManager, debug = false) { this.bufferManager = bufferManager; this.compactionIntervals = { '10m': 10 * 60 * 1000, @@ -418,6 +418,7 @@ class CompactionManager { '24h': 24 * 60 * 60 * 1000 }; this.compactionLock = new Map(); + this.debug = debug; } async initialize() { @@ -493,7 +494,9 @@ class CompactionManager { async checkAndCompact() { const typeDirs = await this.getTypeDirectories(); - console.log('Found types for compaction:', typeDirs); + if (this.debug || typeDirs.length > 0) { + console.log('Found types for compaction:', typeDirs); + } for (const type of typeDirs) { if (this.compactionLock.get(type)) { @@ -506,28 +509,17 @@ class CompactionManager { let metadata = await this.bufferManager.getTypeMetadata(type); if (!metadata.files || !metadata.files.length) { - console.log(`No files found in metadata for type ${type}`); + if (this.debug) console.log(`No files found in metadata for type ${type}`); continue; } - // Verify and clean metadata before compaction metadata = await this.verifyAndCleanMetadata(type, metadata); - if (!metadata.files.length) { - console.log(`No valid files remain after metadata cleanup for type ${type}`); - continue; + if (metadata.files.length > 0) { + console.log(`Type ${type} has ${metadata.files.length} files to consider for compaction`); + await this.compactTimeRange(type, metadata.files, '10m', '1h'); + await this.compactTimeRange(type, metadata.files, '1h', '24h'); } - - console.log(`Type ${type} has ${metadata.files.length} files to consider for compaction`); - console.log('Files:', metadata.files.map(f => ({ - path: f.path, - type: f.type, - min_time: new Date(f.min_time / 1000000).toISOString(), - max_time: new Date(f.max_time / 1000000).toISOString() - }))); - - await this.compactTimeRange(type, metadata.files, '10m', '1h'); - await this.compactTimeRange(type, metadata.files, '1h', '24h'); } catch (error) { console.error(`Error during compaction for type ${type}:`, error); } finally { @@ -970,6 +962,10 @@ class HEPServer { this.buffer = new ParquetBufferManager(); await this.buffer.initialize(); + // Initialize compaction manager with debug flag + this.compaction = new CompactionManager(this.buffer, this.debug); + await this.compaction.initialize(); + // Initialize query client with buffer manager this.queryClient = new QueryClient(this.buffer.baseDir, this.buffer); await this.queryClient.initialize(); From f8eb918d10a366c0845f87d2d3fdc559493e7dd7 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:42:34 +0100 Subject: [PATCH 10/60] query mem buffer --- hepop.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hepop.js b/hepop.js index 0729351..59088a5 100644 --- a/hepop.js +++ b/hepop.js @@ -953,7 +953,9 @@ class CompactionManager { class HEPServer { constructor(config = {}) { this.debug = config.debug || false; - this.queryClient = null; // Add queryClient property + this.queryClient = null; + this.buffer = null; + this.compaction = null; } async initialize() { @@ -963,7 +965,7 @@ class HEPServer { await this.buffer.initialize(); // Initialize compaction manager with debug flag - this.compaction = new CompactionManager(this.buffer, this.debug); + this.compaction = new CompactionManager(this.buffer, true); // Always show compaction logs await this.compaction.initialize(); // Initialize query client with buffer manager @@ -1127,6 +1129,11 @@ class HEPServer { async shutdown() { console.log('Shutting down HEP server...'); + // Stop compaction first + if (this.compaction) { + await this.compaction.close(); + } + // Stop TCP server if (this.tcpServer) { try { From 1c712e01db08d672e3245b43c49dee43cb5d84f1 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 10 Feb 2025 23:48:30 +0100 Subject: [PATCH 11/60] Add test (#53) * Create loopback_test.sh * Update loopback_test.sh --- examples/loopback_test.sh | 55 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 examples/loopback_test.sh diff --git a/examples/loopback_test.sh b/examples/loopback_test.sh new file mode 100644 index 0000000..20e98a1 --- /dev/null +++ b/examples/loopback_test.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Function to generate a random temperature between 60 and 100 +generate_temp() { + echo $((60 + RANDOM % 41)) +} + +# Database and API configuration +DB_URL="localhost:9070/write" +DB_NAME="sensors" +PRECISION="auto" +ACCEPT_PARTIAL="false" +QUERY_URL="http://localhost:9070/query" + +# Room name (can be modified as needed) +room="Garden" + +# Get the start time in ISO format +start_time=$(date -u +"%Y-%m-%dT%H:%M:%S") + +echo "Script started at: $start_time" + +# Counter for tracking every 10 inserts +counter=0 + +# Infinite loop to send data every second +while true; do + # Generate a random temperature + temp=$(generate_temp) + + # Prepare the data payload + data_payload="home,room=$room temp=$temp" + + # Send the data using curl + curl -s -XPOST "$DB_URL?db=$DB_NAME&precision=$PRECISION&accept_partial=$ACCEPT_PARTIAL" \ + --data-raw "$data_payload" + + # Print the payload for debugging + # echo "Payload sent: $data_payload" + + # Increment counter + ((counter++)) + + # Every 10 inserts, check the total count + if (( counter % 50 == 0 )); then + query_payload=$(jq -n --arg time "$start_time" '{query: "SELECT count() as count, avg(temp) as temp FROM home WHERE time >= '\''\($time)'\'' LIMIT 1"}') + response=$(curl -s -X POST "$QUERY_URL" -H "Content-Type: application/json" -d "$query_payload") + echo "Total count: $response" + echo "Total sent: $counter" + fi + + # Wait for 1 second before sending the next request + read -t 0.5 + +done From e53ce54b3e6b3048f0f2b35869073b1420ca5e89 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:20:36 +0100 Subject: [PATCH 12/60] query mem buffer --- hepop.js | 12 +- index.html | 1127 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1138 insertions(+), 1 deletion(-) create mode 100644 index.html diff --git a/hepop.js b/hepop.js index 59088a5..87c60be 100644 --- a/hepop.js +++ b/hepop.js @@ -1018,7 +1018,17 @@ class HEPServer { async fetch(req) { const url = new URL(req.url); - if (url.pathname === '/query') { + if (url.pathname === '/') { + try { + const html = await Bun.file('./index.html').text(); + return new Response(html, { + headers: { 'Content-Type': 'text/html' } + }); + } catch (error) { + console.error('Error serving index.html:', error); + return new Response('Error loading interface', { status: 500 }); + } + } else if (url.pathname === '/query') { try { let query; diff --git a/index.html b/index.html new file mode 100644 index 0000000..d4d5118 --- /dev/null +++ b/index.html @@ -0,0 +1,1127 @@ + + + + + + + Query UI + + + + + + + + + +
+
+ +
+
+ +
+
+ +  (Ctrl/Cmd+Enter) + + + + + 🌑🌞 +
+
+
+
+

+    
+
+ +

+

+

+ + + + \ No newline at end of file From 4ea67f6ed8421c167477278bc54fb05ea874fb51 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:27:09 +0100 Subject: [PATCH 13/60] fix ui --- index.html | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/index.html b/index.html index d4d5118..00e20c6 100644 --- a/index.html +++ b/index.html @@ -717,15 +717,24 @@ return; } - // Extract column names from first row + // Extract column names and types from first row const meta = []; const firstRow = data[0]; for (let key in firstRow) { - // Handle nested timestamp object if (key === 'timestamp') { meta.push({name: 'time', type: 'DateTime'}); + } else if (key === 'tags') { + // Parse JSON tags to get individual columns + try { + const tags = JSON.parse(firstRow[key]); + for (let tagKey in tags) { + meta.push({name: `tag_${tagKey}`, type: 'String'}); + } + } catch (e) { + meta.push({name: 'tags', type: 'String'}); + } } else { - meta.push({name: key, type: 'String'}); + meta.push({name: key, type: key.match(/temp|count|sum|avg/) ? 'Float64' : 'String'}); } } @@ -737,6 +746,16 @@ // Convert microseconds to readable date const micros = parseInt(row[key].micros); values.push(new Date(micros/1000).toISOString()); + } else if (key === 'tags') { + // Parse and expand JSON tags + try { + const tags = JSON.parse(row[key]); + for (let tagKey in tags) { + values.push(tags[tagKey]); + } + } catch (e) { + values.push(row[key]); + } } else { values.push(row[key]); } @@ -1115,8 +1134,8 @@ } // Update example queries - const querypresets = document.getElementById('dropdown'); - querypresets.innerHTML = ` + const querypreset = document.getElementById('dropdown'); + querypreset.innerHTML = ` From fad6316390dd2b95e23247f25c1bc4d478208b23 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:32:07 +0100 Subject: [PATCH 14/60] fix ui --- index.html | 70 ++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 50 deletions(-) diff --git a/index.html b/index.html index 00e20c6..d74d69c 100644 --- a/index.html +++ b/index.html @@ -709,63 +709,33 @@ { clear(); - // Parse the JSON response - const data = JSON.parse(response); + // Parse the JSON response if needed + const data = typeof response === 'string' ? JSON.parse(response) : response; if (!data || !data.length) { renderUnparsedResult("Empty result set"); return; } - // Extract column names and types from first row - const meta = []; - const firstRow = data[0]; - for (let key in firstRow) { - if (key === 'timestamp') { - meta.push({name: 'time', type: 'DateTime'}); - } else if (key === 'tags') { - // Parse JSON tags to get individual columns - try { - const tags = JSON.parse(firstRow[key]); - for (let tagKey in tags) { - meta.push({name: `tag_${tagKey}`, type: 'String'}); - } - } catch (e) { - meta.push({name: 'tags', type: 'String'}); - } - } else { - meta.push({name: key, type: key.match(/temp|count|sum|avg/) ? 'Float64' : 'String'}); - } - } - - // Transform data to expected format - const transformed = data.map(row => { - const values = []; - for (let key in firstRow) { - if (key === 'timestamp') { - // Convert microseconds to readable date - const micros = parseInt(row[key].micros); - values.push(new Date(micros/1000).toISOString()); - } else if (key === 'tags') { - // Parse and expand JSON tags - try { - const tags = JSON.parse(row[key]); - for (let tagKey in tags) { - values.push(tags[tagKey]); - } - } catch (e) { - values.push(row[key]); - } - } else { - values.push(row[key]); - } - } - return values; - }); - + // Transform our data format to the expected format const result = { - meta: meta, - data: transformed, + meta: [ + { name: 'time', type: 'DateTime' }, + { name: 'room', type: 'String' }, + { name: 'temp', type: 'Float64' }, + { name: 'light', type: 'Float64' } + ], + data: data.map(row => { + const tags = JSON.parse(row.tags); + return [ + new Date(parseInt(row.timestamp.micros)/1000).toISOString(), + tags.room, + row.temp, + row.light + ]; + }), + rows: data.length, + rows_before_limit_at_least: data.length, statistics: { elapsed: 0, rows_read: data.length, From 563aa4df571a71f6083dc578c7f0d63f2dc6273c Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:33:46 +0100 Subject: [PATCH 15/60] fix ui --- index.html | 70 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/index.html b/index.html index d74d69c..2190bb3 100644 --- a/index.html +++ b/index.html @@ -717,23 +717,65 @@ return; } - // Transform our data format to the expected format - const result = { - meta: [ - { name: 'time', type: 'DateTime' }, - { name: 'room', type: 'String' }, - { name: 'temp', type: 'Float64' }, - { name: 'light', type: 'Float64' } - ], - data: data.map(row => { + // Detect data type and create appropriate metadata + const firstRow = data[0]; + let meta = []; + let transformedData = []; + + if (firstRow.timestamp && firstRow.tags) { + // Line Protocol data + meta = [ + { name: 'time', type: 'DateTime' } + ]; + + // Extract tag names from first row + const tags = JSON.parse(firstRow.tags); + Object.keys(tags).forEach(tag => { + meta.push({ name: tag, type: 'String' }); + }); + + // Add metric columns + Object.keys(firstRow).forEach(key => { + if (key !== 'timestamp' && key !== 'tags') { + meta.push({ name: key, type: 'Float64' }); + } + }); + + // Transform data + transformedData = data.map(row => { + const values = [new Date(parseInt(row.timestamp.micros)/1000).toISOString()]; const tags = JSON.parse(row.tags); + Object.values(tags).forEach(v => values.push(v)); + Object.keys(row).forEach(key => { + if (key !== 'timestamp' && key !== 'tags') { + values.push(row[key]); + } + }); + return values; + }); + } else if (firstRow.protocol_header) { + // HEP data + meta = [ + { name: 'time', type: 'DateTime' }, + { name: 'src_ip', type: 'String' }, + { name: 'dst_ip', type: 'String' }, + { name: 'payload', type: 'String' } + ]; + + transformedData = data.map(row => { + const header = JSON.parse(row.protocol_header); return [ - new Date(parseInt(row.timestamp.micros)/1000).toISOString(), - tags.room, - row.temp, - row.light + new Date(row.create_date).toISOString(), + header.srcIp || '', + header.dstIp || '', + row.raw || '' ]; - }), + }); + } + + const result = { + meta: meta, + data: transformedData, rows: data.length, rows_before_limit_at_least: data.length, statistics: { From e8c5a2c03f77f98beea63a146606e41c3a794b87 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:35:25 +0100 Subject: [PATCH 16/60] fix ui --- index.html | 102 +++++++++++++++++++---------------------------------- 1 file changed, 37 insertions(+), 65 deletions(-) diff --git a/index.html b/index.html index 2190bb3..394e5c1 100644 --- a/index.html +++ b/index.html @@ -705,87 +705,59 @@ return formatReadable(rows, 2, units); } - function renderResult(response) + function renderResult(response) { clear(); // Parse the JSON response if needed const data = typeof response === 'string' ? JSON.parse(response) : response; - if (!data || !data.length) { - renderUnparsedResult("Empty result set"); + // If response is already in expected format, use it directly + if (data.meta && data.data) { + renderTable(data); return; } - // Detect data type and create appropriate metadata - const firstRow = data[0]; - let meta = []; - let transformedData = []; - - if (firstRow.timestamp && firstRow.tags) { - // Line Protocol data - meta = [ - { name: 'time', type: 'DateTime' } - ]; - - // Extract tag names from first row - const tags = JSON.parse(firstRow.tags); - Object.keys(tags).forEach(tag => { - meta.push({ name: tag, type: 'String' }); - }); - - // Add metric columns - Object.keys(firstRow).forEach(key => { - if (key !== 'timestamp' && key !== 'tags') { - meta.push({ name: key, type: 'Float64' }); - } + // If response is an array, transform it to expected format + if (Array.isArray(data)) { + // Get column names from first row + const firstRow = data[0]; + const meta = Object.keys(firstRow).map(key => { + return { + name: key === 'timestamp' ? 'time' : key, + type: key === 'timestamp' ? 'DateTime' : + key === 'tags' ? 'String' : + typeof firstRow[key] === 'number' ? 'Float64' : 'String' + }; }); - // Transform data - transformedData = data.map(row => { - const values = [new Date(parseInt(row.timestamp.micros)/1000).toISOString()]; - const tags = JSON.parse(row.tags); - Object.values(tags).forEach(v => values.push(v)); - Object.keys(row).forEach(key => { - if (key !== 'timestamp' && key !== 'tags') { - values.push(row[key]); + // Transform rows to arrays matching meta order + const rows = data.map(row => { + return meta.map(col => { + if (col.name === 'time') { + return new Date(parseInt(row.timestamp.micros)/1000).toISOString(); } + return row[col.name]; }); - return values; - }); - } else if (firstRow.protocol_header) { - // HEP data - meta = [ - { name: 'time', type: 'DateTime' }, - { name: 'src_ip', type: 'String' }, - { name: 'dst_ip', type: 'String' }, - { name: 'payload', type: 'String' } - ]; - - transformedData = data.map(row => { - const header = JSON.parse(row.protocol_header); - return [ - new Date(row.create_date).toISOString(), - header.srcIp || '', - header.dstIp || '', - row.raw || '' - ]; }); - } - const result = { - meta: meta, - data: transformedData, - rows: data.length, - rows_before_limit_at_least: data.length, - statistics: { - elapsed: 0, - rows_read: data.length, - bytes_read: response.length - } - }; + const result = { + meta: meta, + data: rows, + rows: data.length, + rows_before_limit_at_least: data.length, + statistics: { + elapsed: 0, + rows_read: data.length, + bytes_read: response.length + } + }; + + renderTable(result); + return; + } - renderTable(result); + renderUnparsedResult("Invalid response format"); } function renderCell(cell, col_idx, settings) From 13dbbacb6a3d8a5eee920079928b92a45c56cbf5 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:36:28 +0100 Subject: [PATCH 17/60] fix ui --- index.html | 70 ++++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 39 deletions(-) diff --git a/index.html b/index.html index 394e5c1..22e60c4 100644 --- a/index.html +++ b/index.html @@ -712,52 +712,44 @@ // Parse the JSON response if needed const data = typeof response === 'string' ? JSON.parse(response) : response; - // If response is already in expected format, use it directly - if (data.meta && data.data) { - renderTable(data); + if (!Array.isArray(data) || !data.length) { + renderUnparsedResult("No data available"); return; } - // If response is an array, transform it to expected format - if (Array.isArray(data)) { - // Get column names from first row - const firstRow = data[0]; - const meta = Object.keys(firstRow).map(key => { - return { - name: key === 'timestamp' ? 'time' : key, - type: key === 'timestamp' ? 'DateTime' : - key === 'tags' ? 'String' : - typeof firstRow[key] === 'number' ? 'Float64' : 'String' - }; - }); - - // Transform rows to arrays matching meta order - const rows = data.map(row => { - return meta.map(col => { - if (col.name === 'time') { - return new Date(parseInt(row.timestamp.micros)/1000).toISOString(); - } - return row[col.name]; - }); - }); + // Get column names from first row + const firstRow = data[0]; + const meta = Object.keys(firstRow).map(key => { + return { + name: key === 'timestamp' ? 'time' : key, + type: key === 'timestamp' ? 'DateTime' : + key === 'tags' ? 'String' : + typeof firstRow[key] === 'number' ? 'Float64' : 'String' + }; + }); - const result = { - meta: meta, - data: rows, - rows: data.length, - rows_before_limit_at_least: data.length, - statistics: { - elapsed: 0, - rows_read: data.length, - bytes_read: response.length + // Transform rows to arrays matching meta order + const rows = data.map(row => { + return meta.map(col => { + if (col.name === 'time') { + return new Date(parseInt(row.timestamp.micros)/1000).toISOString(); } - }; + return row[col.name]; + }); + }); - renderTable(result); - return; - } + const result = { + meta: meta, + data: rows, + rows: data.length, + statistics: { + elapsed: 0, + rows_read: data.length, + bytes_read: response.length + } + }; - renderUnparsedResult("Invalid response format"); + renderTable(result); } function renderCell(cell, col_idx, settings) From f30991e20b39b8739fabc7b79b97da8e75bfa302 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:38:01 +0100 Subject: [PATCH 18/60] fix ui --- index.html | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/index.html b/index.html index 22e60c4..0ae7490 100644 --- a/index.html +++ b/index.html @@ -710,38 +710,33 @@ clear(); // Parse the JSON response if needed - const data = typeof response === 'string' ? JSON.parse(response) : response; - + let data; + try { + data = typeof response === 'string' ? JSON.parse(response) : response; + } catch (e) { + renderUnparsedResult("Invalid JSON response"); + return; + } + if (!Array.isArray(data) || !data.length) { renderUnparsedResult("No data available"); return; } - // Get column names from first row - const firstRow = data[0]; - const meta = Object.keys(firstRow).map(key => { - return { - name: key === 'timestamp' ? 'time' : key, - type: key === 'timestamp' ? 'DateTime' : - key === 'tags' ? 'String' : - typeof firstRow[key] === 'number' ? 'Float64' : 'String' - }; - }); - - // Transform rows to arrays matching meta order - const rows = data.map(row => { - return meta.map(col => { - if (col.name === 'time') { - return new Date(parseInt(row.timestamp.micros)/1000).toISOString(); - } - return row[col.name]; - }); - }); - + // Transform our array format to table format const result = { - meta: meta, - data: rows, - rows: data.length, + meta: [ + { name: 'time', type: 'DateTime' }, + { name: 'tags', type: 'String' }, + { name: 'temp', type: 'Float64' }, + { name: 'light', type: 'Float64' } + ], + data: data.map(row => [ + new Date(parseInt(row.timestamp.micros)/1000).toISOString(), + row.tags, + row.temp, + row.light + ]), statistics: { elapsed: 0, rows_read: data.length, From 90a3cb184a8b82c0fa617d121015ab334a7f77f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:39:46 +0100 Subject: [PATCH 19/60] fix ui --- index.html | 73 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/index.html b/index.html index 0ae7490..69086d3 100644 --- a/index.html +++ b/index.html @@ -723,28 +723,61 @@ return; } - // Transform our array format to table format - const result = { - meta: [ - { name: 'time', type: 'DateTime' }, - { name: 'tags', type: 'String' }, - { name: 'temp', type: 'Float64' }, - { name: 'light', type: 'Float64' } - ], - data: data.map(row => [ - new Date(parseInt(row.timestamp.micros)/1000).toISOString(), - row.tags, - row.temp, - row.light - ]), - statistics: { - elapsed: 0, - rows_read: data.length, - bytes_read: response.length + // Create table elements + const table = document.getElementById('data-table'); + const thead = document.createElement('thead'); + const tbody = document.createElement('tbody'); + + // Create headers + const tr = document.createElement('tr'); + ['Time', 'Room', 'Temperature', 'Light'].forEach(header => { + const th = document.createElement('th'); + th.textContent = header; + tr.appendChild(th); + }); + thead.appendChild(tr); + table.appendChild(thead); + + // Create rows + data.forEach(row => { + const tr = document.createElement('tr'); + + // Time cell + const timeCell = document.createElement('td'); + timeCell.textContent = new Date(parseInt(row.timestamp.micros)/1000).toISOString(); + tr.appendChild(timeCell); + + // Room cell + const roomCell = document.createElement('td'); + try { + const tags = JSON.parse(row.tags); + roomCell.textContent = tags.room; + } catch (e) { + roomCell.textContent = row.tags; } - }; + tr.appendChild(roomCell); + + // Temperature cell + const tempCell = document.createElement('td'); + tempCell.textContent = row.temp; + tempCell.className = 'right'; // Right align numbers + tr.appendChild(tempCell); + + // Light cell + const lightCell = document.createElement('td'); + lightCell.textContent = row.light; + lightCell.className = 'right'; // Right align numbers + tr.appendChild(lightCell); + + tbody.appendChild(tr); + }); + + table.appendChild(tbody); + table.style.display = 'table'; - renderTable(result); + // Update statistics + document.getElementById('stats').innerText = + `${data.length} rows in set`; } function renderCell(cell, col_idx, settings) From 6d3ceb9fd964874aaf5223a4b21e36dc914ef8c0 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:41:12 +0100 Subject: [PATCH 20/60] fix ui --- index.html | 105 ++++++++++++++++++++++++----------------------------- 1 file changed, 47 insertions(+), 58 deletions(-) diff --git a/index.html b/index.html index 69086d3..0a0916d 100644 --- a/index.html +++ b/index.html @@ -705,79 +705,68 @@ return formatReadable(rows, 2, units); } - function renderResult(response) - { + function renderResult(response) { clear(); + + // Parse JSON response + let data = JSON.parse(response); + console.log('Parsed data:', data); // Debug log - // Parse the JSON response if needed - let data; - try { - data = typeof response === 'string' ? JSON.parse(response) : response; - } catch (e) { - renderUnparsedResult("Invalid JSON response"); - return; - } - - if (!Array.isArray(data) || !data.length) { - renderUnparsedResult("No data available"); - return; - } - - // Create table elements + // Create table structure const table = document.getElementById('data-table'); + + // Create header row const thead = document.createElement('thead'); - const tbody = document.createElement('tbody'); - - // Create headers - const tr = document.createElement('tr'); - ['Time', 'Room', 'Temperature', 'Light'].forEach(header => { + const headerRow = document.createElement('tr'); + const headers = ['Time', 'Room', 'Temperature', 'Light']; + + headers.forEach(text => { const th = document.createElement('th'); - th.textContent = header; - tr.appendChild(th); + th.textContent = text; + headerRow.appendChild(th); }); - thead.appendChild(tr); + + thead.appendChild(headerRow); table.appendChild(thead); - // Create rows + // Create data rows + const tbody = document.createElement('tbody'); + data.forEach(row => { + console.log('Processing row:', row); // Debug log const tr = document.createElement('tr'); - // Time cell - const timeCell = document.createElement('td'); - timeCell.textContent = new Date(parseInt(row.timestamp.micros)/1000).toISOString(); - tr.appendChild(timeCell); - - // Room cell - const roomCell = document.createElement('td'); - try { - const tags = JSON.parse(row.tags); - roomCell.textContent = tags.room; - } catch (e) { - roomCell.textContent = row.tags; - } - tr.appendChild(roomCell); - - // Temperature cell - const tempCell = document.createElement('td'); - tempCell.textContent = row.temp; - tempCell.className = 'right'; // Right align numbers - tr.appendChild(tempCell); - - // Light cell - const lightCell = document.createElement('td'); - lightCell.textContent = row.light; - lightCell.className = 'right'; // Right align numbers - tr.appendChild(lightCell); - + // Time + let td = document.createElement('td'); + td.textContent = new Date(parseInt(row.timestamp.micros)/1000).toISOString(); + tr.appendChild(td); + + // Room + td = document.createElement('td'); + const tags = JSON.parse(row.tags); + td.textContent = tags.room; + tr.appendChild(td); + + // Temperature + td = document.createElement('td'); + td.textContent = row.temp; + td.className = 'right'; + tr.appendChild(td); + + // Light + td = document.createElement('td'); + td.textContent = row.light; + td.className = 'right'; + tr.appendChild(td); + tbody.appendChild(tr); }); - + table.appendChild(tbody); table.style.display = 'table'; - - // Update statistics - document.getElementById('stats').innerText = - `${data.length} rows in set`; + + // Update stats + document.getElementById('stats').innerText = `${data.length} rows`; } function renderCell(cell, col_idx, settings) From b1c39dcc4c5c244c97ba4ba582a606928754ed0f Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:43:40 +0100 Subject: [PATCH 21/60] fix ui --- index.html | 63 +++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/index.html b/index.html index 0a0916d..fd6c305 100644 --- a/index.html +++ b/index.html @@ -673,16 +673,11 @@ function clear() { - clearElement('data-table'); - clearElement('graph'); - clearElement('chart'); - clearElement('data-unparsed'); - clearElement('error'); - - document.getElementById('check-mark').display = 'none'; - document.getElementById('hourglass').display = 'none'; + const table = document.getElementById('data-table'); + while (table.firstChild) { + table.firstChild.remove(); + } document.getElementById('stats').innerText = ''; - document.getElementById('logo-container').style.display = 'block'; } function formatReadable(number = 0, decimals = 2, units = []) { @@ -709,63 +704,69 @@ clear(); // Parse JSON response - let data = JSON.parse(response); - console.log('Parsed data:', data); // Debug log + let data; + try { + data = JSON.parse(response); + console.log('Parsed data:', data); + } catch (e) { + console.error('Parse error:', e); + document.getElementById('error').innerText = 'Invalid JSON response'; + document.getElementById('error').style.display = 'block'; + return; + } - // Create table structure + // Create table const table = document.getElementById('data-table'); + table.innerHTML = ''; // Clear existing content - // Create header row + // Add headers const thead = document.createElement('thead'); const headerRow = document.createElement('tr'); - const headers = ['Time', 'Room', 'Temperature', 'Light']; - - headers.forEach(text => { + ['Time', 'Room', 'Temperature', 'Light'].forEach(header => { const th = document.createElement('th'); - th.textContent = text; + th.innerText = header; headerRow.appendChild(th); }); - thead.appendChild(headerRow); table.appendChild(thead); - // Create data rows + // Add data rows const tbody = document.createElement('tbody'); - data.forEach(row => { - console.log('Processing row:', row); // Debug log const tr = document.createElement('tr'); - // Time + // Time column let td = document.createElement('td'); - td.textContent = new Date(parseInt(row.timestamp.micros)/1000).toISOString(); + const timestamp = new Date(parseInt(row.timestamp.micros)/1000); + td.innerText = timestamp.toISOString(); tr.appendChild(td); - // Room + // Room column td = document.createElement('td'); const tags = JSON.parse(row.tags); - td.textContent = tags.room; + td.innerText = tags.room; tr.appendChild(td); - // Temperature + // Temperature column td = document.createElement('td'); - td.textContent = row.temp; + td.innerText = row.temp; td.className = 'right'; tr.appendChild(td); - // Light + // Light column td = document.createElement('td'); - td.textContent = row.light; + td.innerText = row.light; td.className = 'right'; tr.appendChild(td); tbody.appendChild(tr); }); - table.appendChild(tbody); + + // Make table visible table.style.display = 'table'; - // Update stats + // Update row count document.getElementById('stats').innerText = `${data.length} rows`; } From dc1e50710b6ede46cabce729d733b69636ed4751 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:47:05 +0100 Subject: [PATCH 22/60] fix ui --- index.html | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/index.html b/index.html index fd6c305..74a5c36 100644 --- a/index.html +++ b/index.html @@ -576,24 +576,27 @@ } function renderResponse(status, response) { - document.getElementById('hourglass').style.display = 'none'; - if (status === 200) { - let json; - try { json = JSON.parse(response); } catch (e) {} - - if (json !== undefined && json.statistics !== undefined) { - renderResult(json); - } else if (Array.isArray(json) && json.length == 2 && - Array.isArray(json[0]) && Array.isArray(json[1]) && json[0].length > 1 && json[0].length == json[1].length) { - /// If user requested FORMAT JSONCompactColumns, we will render it as a chart. + const json = JSON.parse(response); + + // Handle our array format + if (Array.isArray(json)) { + renderResult(json); // Pass the array directly + } + // Keep existing chart format handler + else if (Array.isArray(json) && json.length == 2 && + Array.isArray(json[0]) && Array.isArray(json[1]) && + json[0].length > 1 && json[0].length == json[1].length) { renderChart(json); - } else { + } + // Keep legacy format handler + else if (json !== undefined && json.statistics !== undefined) { + renderResult(json); + } + else { renderUnparsedResult(response); } - document.getElementById('check-mark').style.display = 'inline'; } else { - /// TODO: Proper rendering of network errors. renderError(response); } } @@ -700,24 +703,18 @@ return formatReadable(rows, 2, units); } - function renderResult(response) { + function renderResult(data) { clear(); - // Parse JSON response - let data; - try { - data = JSON.parse(response); - console.log('Parsed data:', data); - } catch (e) { - console.error('Parse error:', e); - document.getElementById('error').innerText = 'Invalid JSON response'; - document.getElementById('error').style.display = 'block'; + // No need to parse again - data is already parsed + if (!Array.isArray(data)) { + console.error('Expected array data'); return; } // Create table const table = document.getElementById('data-table'); - table.innerHTML = ''; // Clear existing content + table.innerHTML = ''; // Add headers const thead = document.createElement('thead'); From 949d29901edf22326de7749af0fdee476dd4db80 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:49:02 +0100 Subject: [PATCH 23/60] fix ui --- index.html | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/index.html b/index.html index 74a5c36..3c26229 100644 --- a/index.html +++ b/index.html @@ -576,28 +576,34 @@ } function renderResponse(status, response) { + // Hide hourglass when response is received + document.getElementById('hourglass').style.display = 'none'; + if (status === 200) { const json = JSON.parse(response); // Handle our array format if (Array.isArray(json)) { renderResult(json); // Pass the array directly + // Show check mark on success + document.getElementById('check-mark').style.display = 'inline'; } // Keep existing chart format handler else if (Array.isArray(json) && json.length == 2 && Array.isArray(json[0]) && Array.isArray(json[1]) && json[0].length > 1 && json[0].length == json[1].length) { renderChart(json); + document.getElementById('check-mark').style.display = 'inline'; } - // Keep legacy format handler - else if (json !== undefined && json.statistics !== undefined) { - renderResult(json); - } else { renderUnparsedResult(response); + // Don't show check mark for unparsed/error results + document.getElementById('check-mark').style.display = 'none'; } } else { renderError(response); + // Hide check mark on error + document.getElementById('check-mark').style.display = 'none'; } } From b2bef815837ec08c6331661c45d9a4af23a7aa0b Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 01:50:51 +0100 Subject: [PATCH 24/60] fix ui --- index.html | 64 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/index.html b/index.html index 3c26229..4506288 100644 --- a/index.html +++ b/index.html @@ -712,8 +712,7 @@ function renderResult(data) { clear(); - // No need to parse again - data is already parsed - if (!Array.isArray(data)) { + if (!Array.isArray(data) || !data.length) { console.error('Expected array data'); return; } @@ -722,12 +721,16 @@ const table = document.getElementById('data-table'); table.innerHTML = ''; + // Get columns dynamically from first row + const columns = Object.keys(data[0]); + // Add headers const thead = document.createElement('thead'); const headerRow = document.createElement('tr'); - ['Time', 'Room', 'Temperature', 'Light'].forEach(header => { + columns.forEach(header => { const th = document.createElement('th'); - th.innerText = header; + // Clean up header names + th.innerText = header.replace(/[_()]/g, ' ').trim(); headerRow.appendChild(th); }); thead.appendChild(headerRow); @@ -738,29 +741,36 @@ data.forEach(row => { const tr = document.createElement('tr'); - // Time column - let td = document.createElement('td'); - const timestamp = new Date(parseInt(row.timestamp.micros)/1000); - td.innerText = timestamp.toISOString(); - tr.appendChild(td); - - // Room column - td = document.createElement('td'); - const tags = JSON.parse(row.tags); - td.innerText = tags.room; - tr.appendChild(td); - - // Temperature column - td = document.createElement('td'); - td.innerText = row.temp; - td.className = 'right'; - tr.appendChild(td); - - // Light column - td = document.createElement('td'); - td.innerText = row.light; - td.className = 'right'; - tr.appendChild(td); + columns.forEach(col => { + const td = document.createElement('td'); + let value = row[col]; + + // Handle special cases + if (col === 'timestamp' && typeof value === 'object' && value.micros) { + // Convert timestamp + value = new Date(parseInt(value.micros)/1000).toISOString(); + } else if (col === 'tags' && typeof value === 'string') { + // Parse JSON tags + try { + const tags = JSON.parse(value); + value = Object.entries(tags) + .map(([k,v]) => `${k}:${v}`) + .join(', '); + } catch (e) { + // Keep original if parsing fails + value = value; + } + } + + td.innerText = value; + + // Right align numbers + if (typeof row[col] === 'number' || col.match(/count|sum|avg|min|max/i)) { + td.className = 'right'; + } + + tr.appendChild(td); + }); tbody.appendChild(tr); }); From b6b4b320a927e0508626663a2fbbb204c6ea06eb Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 02:02:29 +0100 Subject: [PATCH 25/60] fix timestamping --- hepop.js | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/hepop.js b/hepop.js index 87c60be..0b58c06 100644 --- a/hepop.js +++ b/hepop.js @@ -113,7 +113,27 @@ class ParquetBufferManager { async getFilePath(type, timestamp) { const typeMetadata = await this.getTypeMetadata(type); - const date = new Date(timestamp); + + // Handle nanosecond timestamps + let date; + if (typeof timestamp === 'number') { + // Keep nanosecond precision by using floor division for date parts + const ms = Math.floor(timestamp / 1000000); // Get milliseconds + date = new Date(ms); + } else if (typeof timestamp === 'string') { + // Parse string timestamp + date = new Date(timestamp); + } else if (timestamp instanceof Date) { + date = timestamp; + } else { + throw new Error('Invalid timestamp format'); + } + + if (isNaN(date.getTime())) { + throw new Error(`Invalid date from timestamp: ${timestamp}`); + } + + // Use date for directory structure only const datePath = date.toISOString().split('T')[0]; const hour = date.getHours().toString().padStart(2, '0'); const minute = Math.floor(date.getMinutes() / 10) * 10; From 20594a81e559aafe5faace2b3935aa4b737bd8de Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 02:34:35 +0100 Subject: [PATCH 26/60] fix timestamping --- hepop.js | 61 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/hepop.js b/hepop.js index 0b58c06..c3abb41 100644 --- a/hepop.js +++ b/hepop.js @@ -394,37 +394,72 @@ class ParquetBufferManager { } async addLineProtocolBulk(measurement, rows) { - // Use measurement directly as type (like HEP types) const type = measurement; if (!this.buffers.has(type)) { - // Create new schema for this measurement including its fields - const schema = new parquet.ParquetSchema({ - timestamp: { type: 'TIMESTAMP_MILLIS' }, - tags: { type: 'UTF8' }, - ...Object.entries(rows[0]).reduce((acc, [key, value]) => { + // Get existing schema if any + let existingSchema = null; + try { + const typeMetadata = await this.getTypeMetadata(type); + if (typeMetadata.files.length > 0) { + const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); + existingSchema = reader.schema; + await reader.close(); + } + } catch (error) { + console.log(`No existing schema found for ${type}, creating new one`); + } + + // Merge schemas + const newFields = {}; + rows.forEach(row => { + Object.entries(row).forEach(([key, value]) => { if (key !== 'timestamp' && key !== 'tags') { - acc[key] = { + newFields[key] = { type: typeof value === 'number' ? 'DOUBLE' : - typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8' + typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', + optional: true // Make all fields optional }; } - return acc; - }, {}) + }); + }); + + const schema = new parquet.ParquetSchema({ + timestamp: { type: 'TIMESTAMP_MILLIS' }, + tags: { type: 'UTF8' }, + ...newFields }); this.buffers.set(type, { rows: [], schema, - isLineProtocol: true // Mark as Line Protocol data + isLineProtocol: true }); } const buffer = this.buffers.get(type); - buffer.rows.push(...rows); + + // Ensure all rows have all fields + const allFields = new Set(); + buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); + + const normalizedRows = rows.map(row => { + const normalized = { + timestamp: row.timestamp, + tags: row.tags + }; + allFields.forEach(field => { + if (field !== 'timestamp' && field !== 'tags') { + normalized[field] = row[field] ?? null; + } + }); + return normalized; + }); + + buffer.rows.push(...normalizedRows); if (buffer.rows.length >= this.bufferSize) { - await this.flush(type); // Use the same flush method as HEP + await this.flush(type); } } } From fdd025d5a5779e90be353e36a248e93260cddf0c Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 02:40:35 +0100 Subject: [PATCH 27/60] fix timestamping --- query.js | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/query.js b/query.js index e354fc6..40e80e0 100644 --- a/query.js +++ b/query.js @@ -201,7 +201,7 @@ class QueryClient { query = ` WITH all_data AS ( SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} @@ -218,23 +218,20 @@ class QueryClient { ${parsed.limit} `; } else { - // Original query for non-aggregate queries query = ` - WITH parquet_data AS ( - SELECT ${parsed.columns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + SELECT ${parsed.columns} + FROM ( + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - ) - SELECT * FROM ( - (SELECT * FROM parquet_data) UNION ALL - (SELECT ${parsed.columns} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions}) + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} ) ${parsed.orderBy} ${parsed.limit} @@ -243,7 +240,8 @@ class QueryClient { } else { // Only query buffer data query = ` - SELECT ${parsed.columns} FROM buffer_data + SELECT ${parsed.columns} + FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} From 43bc80796fb73c3af839a7f35c5ce0fae1a6a58e Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 02:43:45 +0100 Subject: [PATCH 28/60] fix union queries --- query.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query.js b/query.js index 40e80e0..8cf96d2 100644 --- a/query.js +++ b/query.js @@ -232,7 +232,7 @@ class QueryClient { WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - ) + ) combined_data ${parsed.orderBy} ${parsed.limit} `; From 9030cc306f6bfc3806b6a62ec829582461b891b0 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 12:18:47 +0100 Subject: [PATCH 29/60] Update loopback_test.sh --- examples/loopback_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/loopback_test.sh b/examples/loopback_test.sh index 20e98a1..161bdb5 100644 --- a/examples/loopback_test.sh +++ b/examples/loopback_test.sh @@ -49,7 +49,7 @@ while true; do echo "Total sent: $counter" fi - # Wait for 1 second before sending the next request + # Wait for x before sending the next request read -t 0.5 done From 64c5c4e81ef2e79c1d291e32b856b3f6802e4683 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:05:13 +0100 Subject: [PATCH 30/60] fix timestamping --- lineproto.js | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/lineproto.js b/lineproto.js index 231d41e..5967529 100644 --- a/lineproto.js +++ b/lineproto.js @@ -30,21 +30,13 @@ function formatValue(v, numericType) { const STRING_REGEX = /^"(.*)"$/; function parseValue(value) { - if (value == null) { - return undefined; - } else if (INT_REGEX.test(value)) { - return parseInt(value.slice(0, -1)); - } else if (TRUE_REGEX.test(value)) { - return true; - } else if (FALSE_REGEX.test(value)) { - return false; - } else if (STRING_REGEX.test(value)) { - return value.slice(1, -1); - } else if (!isNaN(value)) { - return parseFloat(value); - } else { - return undefined; - } + if (!value) return value; + if (value === 'true') return true; + if (value === 'false') return false; + if (value === 'null' || value === 'NULL') return null; + if (value.startsWith('"')) return value.slice(1, -1); + const num = value.includes('.') ? parseFloat(value) : parseInt(value); + return isNaN(num) ? value : num; } function joinObject(obj, withFormatting, config) { @@ -84,9 +76,25 @@ function formatValue(v, numericType) { }, {}); if (timestamp) { - result.timestamp = parseInt(timestamp) / 1000000; + // Handle different timestamp formats + if (/^\d{19}$/.test(timestamp)) { + // Nanosecond precision - keep full value + result.timestamp = BigInt(timestamp); + if (process.env.DEBUG) { + console.log('Parsed nanosecond timestamp:', timestamp, + 'Date:', new Date(Number(result.timestamp / 1000000n)).toISOString()); + } + } else { + // Convert other formats to milliseconds + result.timestamp = parseInt(timestamp); + if (process.env.DEBUG) { + console.log('Parsed timestamp:', timestamp, + 'Date:', new Date(result.timestamp).toISOString()); + } + } } else if (config.addTimestamp) { - result.timestamp = Date.now(); + // Current time in milliseconds + result.timestamp = BigInt(Date.now()) * 1000000n; } return result; From 45f5592805f89bed524346c0aa495832dd190d63 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:07:27 +0100 Subject: [PATCH 31/60] fix timestamping --- hepop.js | 2 +- lineproto.js | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hepop.js b/hepop.js index c3abb41..3fc2a67 100644 --- a/hepop.js +++ b/hepop.js @@ -1142,7 +1142,7 @@ class HEPServer { } bulkData.get(measurement).push({ - timestamp: new Date(parsed.timestamp), + timestamp: new Date(parsed.timestampMs), // Use millisecond timestamp for Date tags: JSON.stringify(parsed.tags), ...parsed.fields }); diff --git a/lineproto.js b/lineproto.js index 5967529..c4f9d6b 100644 --- a/lineproto.js +++ b/lineproto.js @@ -78,23 +78,28 @@ function formatValue(v, numericType) { if (timestamp) { // Handle different timestamp formats if (/^\d{19}$/.test(timestamp)) { - // Nanosecond precision - keep full value - result.timestamp = BigInt(timestamp); + // Nanosecond precision - store as BigInt but provide milliseconds for Date + const nanos = BigInt(timestamp); + result.timestamp = nanos; // Keep full precision + result.timestampMs = Number(nanos / 1000000n); // For Date operations if (process.env.DEBUG) { console.log('Parsed nanosecond timestamp:', timestamp, - 'Date:', new Date(Number(result.timestamp / 1000000n)).toISOString()); + 'Date:', new Date(result.timestampMs).toISOString()); } } else { // Convert other formats to milliseconds result.timestamp = parseInt(timestamp); + result.timestampMs = result.timestamp; if (process.env.DEBUG) { console.log('Parsed timestamp:', timestamp, - 'Date:', new Date(result.timestamp).toISOString()); + 'Date:', new Date(result.timestampMs).toISOString()); } } } else if (config.addTimestamp) { - // Current time in milliseconds - result.timestamp = BigInt(Date.now()) * 1000000n; + // Current time in nanoseconds + const now = BigInt(Date.now()); + result.timestamp = now * 1000000n; + result.timestampMs = Number(now); } return result; From 3bd35e43a2247dc43fed5204d942cc980282941d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:10:29 +0100 Subject: [PATCH 32/60] fix timestamping --- lineproto.js | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/lineproto.js b/lineproto.js index c4f9d6b..781f612 100644 --- a/lineproto.js +++ b/lineproto.js @@ -78,28 +78,46 @@ function formatValue(v, numericType) { if (timestamp) { // Handle different timestamp formats if (/^\d{19}$/.test(timestamp)) { - // Nanosecond precision - store as BigInt but provide milliseconds for Date + // Nanosecond precision - store as nanoseconds and provide milliseconds const nanos = BigInt(timestamp); - result.timestamp = nanos; // Keep full precision - result.timestampMs = Number(nanos / 1000000n); // For Date operations + result.timestamp = nanos; + result.timestampMs = Number(nanos / BigInt(1000000)); if (process.env.DEBUG) { - console.log('Parsed nanosecond timestamp:', timestamp, - 'Date:', new Date(result.timestampMs).toISOString()); + console.log('Parsed nanosecond timestamp:', { + original: timestamp, + nanos: nanos.toString(), + ms: result.timestampMs, + date: new Date(result.timestampMs).toISOString() + }); + } + } else if (/^\d+$/.test(timestamp)) { + // Regular numeric timestamp - assume milliseconds + result.timestampMs = parseInt(timestamp); + result.timestamp = BigInt(result.timestampMs) * BigInt(1000000); + if (process.env.DEBUG) { + console.log('Parsed millisecond timestamp:', { + original: timestamp, + ms: result.timestampMs, + date: new Date(result.timestampMs).toISOString() + }); } } else { - // Convert other formats to milliseconds - result.timestamp = parseInt(timestamp); - result.timestampMs = result.timestamp; + // Fallback to current time + const now = Date.now(); + result.timestampMs = now; + result.timestamp = BigInt(now) * BigInt(1000000); if (process.env.DEBUG) { - console.log('Parsed timestamp:', timestamp, - 'Date:', new Date(result.timestampMs).toISOString()); + console.log('Using current timestamp:', { + ms: result.timestampMs, + date: new Date(result.timestampMs).toISOString() + }); } } } else if (config.addTimestamp) { - // Current time in nanoseconds - const now = BigInt(Date.now()); - result.timestamp = now * 1000000n; - result.timestampMs = Number(now); + // Current time + const now = Date.now(); + result.timestampMs = now; + result.timestamp = BigInt(now) * BigInt(1000000); } return result; From a2233983bacc987e0c012be9b34c1469e963da73 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:12:01 +0100 Subject: [PATCH 33/60] fix timestamping --- hepop.js | 78 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/hepop.js b/hepop.js index 3fc2a67..8a8613f 100644 --- a/hepop.js +++ b/hepop.js @@ -246,6 +246,7 @@ class ParquetBufferManager { async writeTypeMetadata(type, metadata) { const metadataPath = this.getTypeMetadataPath(type); + // Ensure directory exists before writing temp file await fs.promises.mkdir(path.dirname(metadataPath), { recursive: true }); const tempPath = `${metadataPath}.tmp`; @@ -263,36 +264,59 @@ class ParquetBufferManager { } async updateMetadata(type, filePath, sizeBytes, rowCount, timestamps) { - const minTime = Math.min(...timestamps.map(t => t.getTime() * 1000000)); - const maxTime = Math.max(...timestamps.map(t => t.getTime() * 1000000)); - const chunkTime = Math.floor(minTime / 600000000000) * 600000000000; - - // Get current type metadata - const typeMetadata = await this.getTypeMetadata(type); - - const fileInfo = { - id: typeMetadata.files.length, - path: filePath, - size_bytes: sizeBytes, - row_count: rowCount, - chunk_time: chunkTime, - min_time: minTime, - max_time: maxTime, - type: 'raw' + // Handle timestamps safely + const getTimeInNanos = (timestamp) => { + if (timestamp instanceof Date) { + return BigInt(timestamp.getTime()) * BigInt(1000000); + } + // If it's already a BigInt (nanoseconds), return as is + if (typeof timestamp === 'bigint') { + return timestamp; + } + // Convert number to nanoseconds + return BigInt(Math.floor(timestamp)) * BigInt(1000000); }; - // Update type metadata - typeMetadata.files.push(fileInfo); - typeMetadata.parquet_size_bytes += sizeBytes; - typeMetadata.row_count += rowCount; - typeMetadata.min_time = typeMetadata.min_time ? - Math.min(typeMetadata.min_time, minTime) : minTime; - typeMetadata.max_time = typeMetadata.max_time ? - Math.max(typeMetadata.max_time, maxTime) : maxTime; - typeMetadata.wal_sequence++; + try { + const timeNanos = timestamps.map(getTimeInNanos); + const minTime = timeNanos.length > 0 ? + Number(timeNanos.reduce((a, b) => a < b ? a : b)) : + Date.now() * 1000000; + const maxTime = timeNanos.length > 0 ? + Number(timeNanos.reduce((a, b) => a > b ? a : b)) : + Date.now() * 1000000; + const chunkTime = Math.floor(minTime / 600000000000) * 600000000000; + + // Get current type metadata + const typeMetadata = await this.getTypeMetadata(type); + + const fileInfo = { + id: typeMetadata.files.length, + path: filePath, + size_bytes: sizeBytes, + row_count: rowCount, + chunk_time: chunkTime, + min_time: minTime, + max_time: maxTime, + type: 'raw' + }; - // Write updated metadata - await this.writeTypeMetadata(type, typeMetadata); + // Update type metadata + typeMetadata.files.push(fileInfo); + typeMetadata.parquet_size_bytes += sizeBytes; + typeMetadata.row_count += rowCount; + typeMetadata.min_time = typeMetadata.min_time ? + Math.min(typeMetadata.min_time, minTime) : minTime; + typeMetadata.max_time = typeMetadata.max_time ? + Math.max(typeMetadata.max_time, maxTime) : maxTime; + typeMetadata.wal_sequence++; + + // Write updated metadata + await this.writeTypeMetadata(type, typeMetadata); + } catch (error) { + console.error('Error updating metadata:', error); + throw error; + } } async close() { From 10df828075e9c7e4b92bec75925703aabc3f415c Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:14:00 +0100 Subject: [PATCH 34/60] fix timestamping --- hepop.js | 106 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 72 insertions(+), 34 deletions(-) diff --git a/hepop.js b/hepop.js index 8a8613f..20670bf 100644 --- a/hepop.js +++ b/hepop.js @@ -39,6 +39,9 @@ class ParquetBufferManager { tags: { type: 'UTF8' }, // JSON string of tags // Dynamic fields will be added based on data }); + + // Add metadata locks + this.metadataLocks = new Map(); } async initialize() { @@ -244,52 +247,84 @@ class ParquetBufferManager { ); } + async acquireMetadataLock(type) { + while (this.metadataLocks.get(type)) { + await new Promise(resolve => setTimeout(resolve, 100)); + } + this.metadataLocks.set(type, true); + } + + async releaseMetadataLock(type) { + this.metadataLocks.set(type, false); + } + async writeTypeMetadata(type, metadata) { - const metadataPath = this.getTypeMetadataPath(type); - // Ensure directory exists before writing temp file - await fs.promises.mkdir(path.dirname(metadataPath), { recursive: true }); + await this.acquireMetadataLock(type); - const tempPath = `${metadataPath}.tmp`; try { + const metadataPath = this.getTypeMetadataPath(type); + const dirPath = path.dirname(metadataPath); + + // Ensure directory exists + await fs.promises.mkdir(dirPath, { recursive: true }); + + // Write to temp file first + const tempPath = `${metadataPath}.tmp`; await fs.promises.writeFile(tempPath, JSON.stringify(metadata, null, 2)); + + // Verify temp file exists and is valid JSON + const tempContent = await fs.promises.readFile(tempPath, 'utf8'); + JSON.parse(tempContent); // Validate JSON + + // Atomic rename await fs.promises.rename(tempPath, metadataPath); + + // Verify final file exists + await fs.promises.access(metadataPath); + } catch (error) { - try { - await fs.promises.unlink(tempPath); - } catch (e) { - // Ignore cleanup errors - } + console.error(`Error writing metadata for type ${type}:`, error); throw error; + } finally { + await this.releaseMetadataLock(type); } } async updateMetadata(type, filePath, sizeBytes, rowCount, timestamps) { - // Handle timestamps safely - const getTimeInNanos = (timestamp) => { - if (timestamp instanceof Date) { - return BigInt(timestamp.getTime()) * BigInt(1000000); - } - // If it's already a BigInt (nanoseconds), return as is - if (typeof timestamp === 'bigint') { - return timestamp; + await this.acquireMetadataLock(type); + + try { + // Get current metadata first + const typeMetadata = await this.getTypeMetadata(type); + + // Process timestamps in chunks to avoid stack overflow + const chunkSize = 1000; + let minTime = Infinity; + let maxTime = -Infinity; + + for (let i = 0; i < timestamps.length; i += chunkSize) { + const chunk = timestamps.slice(i, i + chunkSize); + for (const timestamp of chunk) { + let timeNanos; + if (timestamp instanceof Date) { + timeNanos = BigInt(timestamp.getTime()) * BigInt(1000000); + } else if (typeof timestamp === 'bigint') { + timeNanos = timestamp; + } else { + timeNanos = BigInt(Math.floor(timestamp)) * BigInt(1000000); + } + + const timeMs = Number(timeNanos / BigInt(1000000)); + minTime = Math.min(minTime, timeMs); + maxTime = Math.max(maxTime, timeMs); + } } - // Convert number to nanoseconds - return BigInt(Math.floor(timestamp)) * BigInt(1000000); - }; - try { - const timeNanos = timestamps.map(getTimeInNanos); - const minTime = timeNanos.length > 0 ? - Number(timeNanos.reduce((a, b) => a < b ? a : b)) : - Date.now() * 1000000; - const maxTime = timeNanos.length > 0 ? - Number(timeNanos.reduce((a, b) => a > b ? a : b)) : - Date.now() * 1000000; + // Convert back to nanoseconds + minTime = minTime * 1000000; + maxTime = maxTime * 1000000; const chunkTime = Math.floor(minTime / 600000000000) * 600000000000; - // Get current type metadata - const typeMetadata = await this.getTypeMetadata(type); - const fileInfo = { id: typeMetadata.files.length, path: filePath, @@ -301,7 +336,7 @@ class ParquetBufferManager { type: 'raw' }; - // Update type metadata + // Update metadata typeMetadata.files.push(fileInfo); typeMetadata.parquet_size_bytes += sizeBytes; typeMetadata.row_count += rowCount; @@ -313,9 +348,12 @@ class ParquetBufferManager { // Write updated metadata await this.writeTypeMetadata(type, typeMetadata); + } catch (error) { - console.error('Error updating metadata:', error); + console.error(`Error updating metadata for type ${type}:`, error); throw error; + } finally { + await this.releaseMetadataLock(type); } } @@ -1297,7 +1335,7 @@ class HEPServer { return new Date( (rcinfo.timeSeconds * 1000) + (((100000 + rcinfo.timeUseconds) / 1000) - 100) - ); + )); } } From 0461b4c01efdb3b6415e849b7ca8448d3394e808 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:14:36 +0100 Subject: [PATCH 35/60] fix timestamping --- hepop.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepop.js b/hepop.js index 20670bf..6880572 100644 --- a/hepop.js +++ b/hepop.js @@ -1335,7 +1335,7 @@ class HEPServer { return new Date( (rcinfo.timeSeconds * 1000) + (((100000 + rcinfo.timeUseconds) / 1000) - 100) - )); + ); } } From fa014ce3abff36ee95eadbd4bb741d4fb07039f5 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:17:12 +0100 Subject: [PATCH 36/60] fix timestamping --- hepop.js | 160 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 82 insertions(+), 78 deletions(-) diff --git a/hepop.js b/hepop.js index 6880572..fa4954d 100644 --- a/hepop.js +++ b/hepop.js @@ -180,15 +180,23 @@ class ParquetBufferManager { } async flush(type) { + console.log(`Attempting to flush ${type}`); const buffer = this.buffers.get(type); - if (!buffer?.rows.length) return; + if (!buffer?.rows.length) { + console.log(`No rows to flush for ${type}`); + return; + } try { + console.log(`Flushing ${buffer.rows.length} rows for ${type}`); const filePath = await this.getFilePath(type, buffer.isLineProtocol ? buffer.rows[0].timestamp : buffer.rows[0].create_date); + await fs.promises.mkdir(path.dirname(filePath), { recursive: true }); + console.log(`Created directory for ${filePath}`); // Create writer with appropriate schema + console.log(`Creating writer for ${type} with schema:`, buffer.schema); const writer = await parquet.ParquetWriter.openFile( buffer.schema, filePath, @@ -209,9 +217,11 @@ class ParquetBufferManager { } await writer.close(); + console.log(`Closed writer for ${filePath}`); // Get file stats const stats = await fs.promises.stat(filePath); + console.log(`File stats for ${filePath}:`, stats); // Update metadata await this.updateMetadata( @@ -230,9 +240,10 @@ class ParquetBufferManager { isLineProtocol: buffer.isLineProtocol }); - console.log(`Wrote ${buffer.rows.length} records to ${filePath}`); + console.log(`Successfully wrote ${buffer.rows.length} records to ${filePath}`); } catch (error) { - console.error(`Parquet flush error:`, error); + console.error(`Parquet flush error for ${type}:`, error); + throw error; } } @@ -363,26 +374,6 @@ class ParquetBufferManager { } } - async ensureDirectories() { - const metadataDir = path.join(this.baseDir, this.writerId); - await fs.promises.mkdir(metadataDir, { recursive: true }); - - // Write initial metadata file if it doesn't exist - const metadataPath = path.join(metadataDir, 'metadata.json'); - if (!fs.existsSync(metadataPath)) { - const initialMetadata = { - writer_id: this.writerId, - next_db_id: 0, - next_table_id: 0 - }; - - await fs.promises.writeFile( - metadataPath, - JSON.stringify(initialMetadata, null, 2) - ); - } - } - async addLineProtocol(data) { const measurement = data.measurement; if (!this.buffers.has(measurement)) { @@ -456,72 +447,85 @@ class ParquetBufferManager { } async addLineProtocolBulk(measurement, rows) { + console.log(`Processing bulk write for ${measurement}: ${rows.length} rows`); const type = measurement; - if (!this.buffers.has(type)) { - // Get existing schema if any - let existingSchema = null; - try { - const typeMetadata = await this.getTypeMetadata(type); - if (typeMetadata.files.length > 0) { - const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); - existingSchema = reader.schema; - await reader.close(); + try { + if (!this.buffers.has(type)) { + console.log(`Creating new buffer for ${type}`); + // Get existing schema if any + let existingSchema = null; + try { + const typeMetadata = await this.getTypeMetadata(type); + if (typeMetadata.files.length > 0) { + const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); + existingSchema = reader.schema; + await reader.close(); + console.log(`Found existing schema for ${type}`); + } + } catch (error) { + console.log(`Creating new schema for ${type}`); } - } catch (error) { - console.log(`No existing schema found for ${type}, creating new one`); - } - // Merge schemas - const newFields = {}; - rows.forEach(row => { - Object.entries(row).forEach(([key, value]) => { - if (key !== 'timestamp' && key !== 'tags') { - newFields[key] = { - type: typeof value === 'number' ? 'DOUBLE' : - typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', - optional: true // Make all fields optional - }; - } + // Merge schemas + const newFields = {}; + rows.forEach(row => { + Object.entries(row).forEach(([key, value]) => { + if (key !== 'timestamp' && key !== 'tags') { + newFields[key] = { + type: typeof value === 'number' ? 'DOUBLE' : + typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', + optional: true + }; + } + }); }); - }); - const schema = new parquet.ParquetSchema({ - timestamp: { type: 'TIMESTAMP_MILLIS' }, - tags: { type: 'UTF8' }, - ...newFields - }); + const schema = new parquet.ParquetSchema({ + timestamp: { type: 'TIMESTAMP_MILLIS' }, + tags: { type: 'UTF8' }, + ...newFields + }); - this.buffers.set(type, { - rows: [], - schema, - isLineProtocol: true - }); - } + console.log(`Created schema for ${type}:`, schema); - const buffer = this.buffers.get(type); - - // Ensure all rows have all fields - const allFields = new Set(); - buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); - - const normalizedRows = rows.map(row => { - const normalized = { - timestamp: row.timestamp, - tags: row.tags - }; - allFields.forEach(field => { - if (field !== 'timestamp' && field !== 'tags') { - normalized[field] = row[field] ?? null; - } + this.buffers.set(type, { + rows: [], + schema, + isLineProtocol: true + }); + } + + const buffer = this.buffers.get(type); + console.log(`Current buffer size for ${type}: ${buffer.rows.length}`); + + // Ensure all rows have all fields + const allFields = new Set(); + buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); + + const normalizedRows = rows.map(row => { + const normalized = { + timestamp: row.timestamp, + tags: row.tags + }; + allFields.forEach(field => { + if (field !== 'timestamp' && field !== 'tags') { + normalized[field] = row[field] ?? null; + } + }); + return normalized; }); - return normalized; - }); - buffer.rows.push(...normalizedRows); + buffer.rows.push(...normalizedRows); + console.log(`Buffer size after push for ${type}: ${buffer.rows.length}`); - if (buffer.rows.length >= this.bufferSize) { - await this.flush(type); + if (buffer.rows.length >= this.bufferSize) { + console.log(`Buffer full for ${type}, flushing...`); + await this.flush(type); + } + } catch (error) { + console.error(`Error in addLineProtocolBulk for ${type}:`, error); + throw error; } } } From dbf5e9b7a35c8a43d255048c336b78e5d7618276 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:18:54 +0100 Subject: [PATCH 37/60] fix timestamping --- hepop.js | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/hepop.js b/hepop.js index fa4954d..ded5096 100644 --- a/hepop.js +++ b/hepop.js @@ -44,6 +44,30 @@ class ParquetBufferManager { this.metadataLocks = new Map(); } + async ensureDirectories() { + const metadataDir = path.join(this.baseDir, this.writerId); + await fs.promises.mkdir(metadataDir, { recursive: true }); + + // Write initial metadata file if it doesn't exist + const metadataPath = path.join(metadataDir, 'metadata.json'); + if (!fs.existsSync(metadataPath)) { + const initialMetadata = { + writer_id: this.writerId, + next_db_id: 0, + next_table_id: 0 + }; + + await fs.promises.writeFile( + metadataPath, + JSON.stringify(initialMetadata, null, 2) + ); + } + + // Create dbs directory + const dbsDir = path.join(metadataDir, 'dbs'); + await fs.promises.mkdir(dbsDir, { recursive: true }); + } + async initialize() { // Ensure base directories exist await this.ensureDirectories(); From 258974e414b9b0425f8e4b146efeac22c6ca82b2 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:23:04 +0100 Subject: [PATCH 38/60] fix timestamping --- hepop.js | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/hepop.js b/hepop.js index ded5096..7b1ed3d 100644 --- a/hepop.js +++ b/hepop.js @@ -144,11 +144,9 @@ class ParquetBufferManager { // Handle nanosecond timestamps let date; if (typeof timestamp === 'number') { - // Keep nanosecond precision by using floor division for date parts - const ms = Math.floor(timestamp / 1000000); // Get milliseconds + const ms = Math.floor(timestamp / 1000000); date = new Date(ms); } else if (typeof timestamp === 'string') { - // Parse string timestamp date = new Date(timestamp); } else if (timestamp instanceof Date) { date = timestamp; @@ -160,11 +158,15 @@ class ParquetBufferManager { throw new Error(`Invalid date from timestamp: ${timestamp}`); } - // Use date for directory structure only + // Use date for directory structure const datePath = date.toISOString().split('T')[0]; const hour = date.getHours().toString().padStart(2, '0'); const minute = Math.floor(date.getMinutes() / 10) * 10; const minutePath = minute.toString().padStart(2, '0'); + + // Increment WAL sequence before creating path + typeMetadata.wal_sequence++; + await this.writeTypeMetadata(type, typeMetadata); return path.join( this.baseDir, @@ -212,22 +214,21 @@ class ParquetBufferManager { } try { - console.log(`Flushing ${buffer.rows.length} rows for ${type}`); + // Get new file path with incremented sequence const filePath = await this.getFilePath(type, buffer.isLineProtocol ? buffer.rows[0].timestamp : buffer.rows[0].create_date); + console.log(`Creating new file: ${filePath}`); await fs.promises.mkdir(path.dirname(filePath), { recursive: true }); - console.log(`Created directory for ${filePath}`); - // Create writer with appropriate schema - console.log(`Creating writer for ${type} with schema:`, buffer.schema); + // Create writer with schema const writer = await parquet.ParquetWriter.openFile( buffer.schema, filePath, this.writerOptions ); - // Write rows based on type + // Write rows for (const data of buffer.rows) { if (buffer.isLineProtocol) { await writer.appendRow(data); @@ -241,13 +242,11 @@ class ParquetBufferManager { } await writer.close(); - console.log(`Closed writer for ${filePath}`); // Get file stats const stats = await fs.promises.stat(filePath); - console.log(`File stats for ${filePath}:`, stats); - // Update metadata + // Update metadata with new file await this.updateMetadata( type, filePath, @@ -257,7 +256,7 @@ class ParquetBufferManager { d.timestamp : new Date(d.create_date)) ); - // Clear buffer + // Clear buffer after successful write this.buffers.set(type, { rows: [], schema: buffer.schema, From b853367dd2994e5f31eeda169d9f823598d7c989 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:24:35 +0100 Subject: [PATCH 39/60] fix timestamping --- hepop.js | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/hepop.js b/hepop.js index 7b1ed3d..896b853 100644 --- a/hepop.js +++ b/hepop.js @@ -206,29 +206,21 @@ class ParquetBufferManager { } async flush(type) { - console.log(`Attempting to flush ${type}`); const buffer = this.buffers.get(type); - if (!buffer?.rows.length) { - console.log(`No rows to flush for ${type}`); - return; - } + if (!buffer?.rows.length) return; try { - // Get new file path with incremented sequence const filePath = await this.getFilePath(type, buffer.isLineProtocol ? buffer.rows[0].timestamp : buffer.rows[0].create_date); - console.log(`Creating new file: ${filePath}`); await fs.promises.mkdir(path.dirname(filePath), { recursive: true }); - // Create writer with schema const writer = await parquet.ParquetWriter.openFile( buffer.schema, filePath, this.writerOptions ); - // Write rows for (const data of buffer.rows) { if (buffer.isLineProtocol) { await writer.appendRow(data); @@ -243,10 +235,8 @@ class ParquetBufferManager { await writer.close(); - // Get file stats const stats = await fs.promises.stat(filePath); - // Update metadata with new file await this.updateMetadata( type, filePath, @@ -256,14 +246,13 @@ class ParquetBufferManager { d.timestamp : new Date(d.create_date)) ); - // Clear buffer after successful write this.buffers.set(type, { rows: [], schema: buffer.schema, isLineProtocol: buffer.isLineProtocol }); - console.log(`Successfully wrote ${buffer.rows.length} records to ${filePath}`); + console.log(`Wrote ${buffer.rows.length} records to ${path.basename(filePath)}`); } catch (error) { console.error(`Parquet flush error for ${type}:`, error); throw error; @@ -470,12 +459,10 @@ class ParquetBufferManager { } async addLineProtocolBulk(measurement, rows) { - console.log(`Processing bulk write for ${measurement}: ${rows.length} rows`); const type = measurement; try { if (!this.buffers.has(type)) { - console.log(`Creating new buffer for ${type}`); // Get existing schema if any let existingSchema = null; try { @@ -484,10 +471,9 @@ class ParquetBufferManager { const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); existingSchema = reader.schema; await reader.close(); - console.log(`Found existing schema for ${type}`); } } catch (error) { - console.log(`Creating new schema for ${type}`); + // Silently create new schema } // Merge schemas @@ -510,8 +496,6 @@ class ParquetBufferManager { ...newFields }); - console.log(`Created schema for ${type}:`, schema); - this.buffers.set(type, { rows: [], schema, @@ -520,9 +504,8 @@ class ParquetBufferManager { } const buffer = this.buffers.get(type); - console.log(`Current buffer size for ${type}: ${buffer.rows.length}`); - // Ensure all rows have all fields + // Ensure all fields are present const allFields = new Set(); buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); @@ -540,10 +523,8 @@ class ParquetBufferManager { }); buffer.rows.push(...normalizedRows); - console.log(`Buffer size after push for ${type}: ${buffer.rows.length}`); if (buffer.rows.length >= this.bufferSize) { - console.log(`Buffer full for ${type}, flushing...`); await this.flush(type); } } catch (error) { From b1d1a856328d59157f90ce5b25fe0803382522a1 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:25:33 +0100 Subject: [PATCH 40/60] fix timestamping --- hepop.js | 66 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/hepop.js b/hepop.js index 896b853..82d6eaa 100644 --- a/hepop.js +++ b/hepop.js @@ -198,7 +198,7 @@ class ParquetBufferManager { } startFlushInterval() { - setInterval(() => { + this.flushInterval = setInterval(() => { for (const type of this.buffers.keys()) { this.flush(type); } @@ -381,8 +381,18 @@ class ParquetBufferManager { } async close() { + // Clear flush interval first + if (this.flushInterval) { + clearInterval(this.flushInterval); + } + + // Final flush of all buffers for (const type of this.buffers.keys()) { - await this.flush(type); + try { + await this.flush(type); + } catch (error) { + console.error(`Error flushing buffer for ${type}:`, error); + } } } @@ -1264,50 +1274,60 @@ class HEPServer { async shutdown() { console.log('Shutting down HEP server...'); - // Stop compaction first - if (this.compaction) { - await this.compaction.close(); - } - - // Stop TCP server + // Stop accepting new connections first if (this.tcpServer) { try { this.tcpServer.stop(true); - this.tcpServer.unref(); } catch (error) { console.error('Error stopping TCP server:', error); } } - // Stop UDP server if (this.udpServer) { try { - // UDP sockets use close() not stop() - if (this.udpSever?.close) this.udpServer.close(); + if (this.udpServer.close) this.udpServer.close(); } catch (error) { console.error('Error stopping UDP server:', error); } } - // Stop HTTP server if (this.httpServer) { try { this.httpServer.stop(true); - this.httpServer.unref(); } catch (error) { console.error('Error stopping HTTP server:', error); } } - - // Flush any remaining data - try { - await this.buffer.close(); - } catch (error) { - console.error('Error flushing buffers:', error); + + // Stop compaction and intervals + if (this.compaction) { + try { + await this.compaction.close(); + } catch (error) { + console.error('Error stopping compaction:', error); + } } - + + // Stop buffer manager intervals + if (this.buffer) { + try { + // Clear flush interval + if (this.buffer.flushInterval) { + clearInterval(this.buffer.flushInterval); + } + // Final flush + await this.buffer.close(); + } catch (error) { + console.error('Error closing buffer manager:', error); + } + } + console.log('Server shutdown complete'); - process.exit(0); + // Force exit after 1 second if still running + setTimeout(() => { + console.log('Forcing exit...'); + process.exit(0); + }, 1000); } handleData(data, socket) { @@ -1343,7 +1363,7 @@ class HEPServer { return new Date( (rcinfo.timeSeconds * 1000) + (((100000 + rcinfo.timeUseconds) / 1000) - 100) - ); + )); } } From 92e10f8f13c877bde82262c64f154594cd9f1392 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:26:09 +0100 Subject: [PATCH 41/60] fix timestamping --- hepop.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hepop.js b/hepop.js index 82d6eaa..183a4de 100644 --- a/hepop.js +++ b/hepop.js @@ -1363,7 +1363,7 @@ class HEPServer { return new Date( (rcinfo.timeSeconds * 1000) + (((100000 + rcinfo.timeUseconds) / 1000) - 100) - )); + ); } } From 49b1f6f3a06b81349ac59c61a22926569dc13cfd Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 11 Feb 2025 12:32:50 +0000 Subject: [PATCH 42/60] restore buffer version --- hepop.js | 376 ++++++++++++++++++++------------------------------- lineproto.js | 65 +++------ 2 files changed, 160 insertions(+), 281 deletions(-) diff --git a/hepop.js b/hepop.js index 183a4de..c3abb41 100644 --- a/hepop.js +++ b/hepop.js @@ -39,33 +39,6 @@ class ParquetBufferManager { tags: { type: 'UTF8' }, // JSON string of tags // Dynamic fields will be added based on data }); - - // Add metadata locks - this.metadataLocks = new Map(); - } - - async ensureDirectories() { - const metadataDir = path.join(this.baseDir, this.writerId); - await fs.promises.mkdir(metadataDir, { recursive: true }); - - // Write initial metadata file if it doesn't exist - const metadataPath = path.join(metadataDir, 'metadata.json'); - if (!fs.existsSync(metadataPath)) { - const initialMetadata = { - writer_id: this.writerId, - next_db_id: 0, - next_table_id: 0 - }; - - await fs.promises.writeFile( - metadataPath, - JSON.stringify(initialMetadata, null, 2) - ); - } - - // Create dbs directory - const dbsDir = path.join(metadataDir, 'dbs'); - await fs.promises.mkdir(dbsDir, { recursive: true }); } async initialize() { @@ -144,9 +117,11 @@ class ParquetBufferManager { // Handle nanosecond timestamps let date; if (typeof timestamp === 'number') { - const ms = Math.floor(timestamp / 1000000); + // Keep nanosecond precision by using floor division for date parts + const ms = Math.floor(timestamp / 1000000); // Get milliseconds date = new Date(ms); } else if (typeof timestamp === 'string') { + // Parse string timestamp date = new Date(timestamp); } else if (timestamp instanceof Date) { date = timestamp; @@ -158,15 +133,11 @@ class ParquetBufferManager { throw new Error(`Invalid date from timestamp: ${timestamp}`); } - // Use date for directory structure + // Use date for directory structure only const datePath = date.toISOString().split('T')[0]; const hour = date.getHours().toString().padStart(2, '0'); const minute = Math.floor(date.getMinutes() / 10) * 10; const minutePath = minute.toString().padStart(2, '0'); - - // Increment WAL sequence before creating path - typeMetadata.wal_sequence++; - await this.writeTypeMetadata(type, typeMetadata); return path.join( this.baseDir, @@ -198,7 +169,7 @@ class ParquetBufferManager { } startFlushInterval() { - this.flushInterval = setInterval(() => { + setInterval(() => { for (const type of this.buffers.keys()) { this.flush(type); } @@ -212,15 +183,16 @@ class ParquetBufferManager { try { const filePath = await this.getFilePath(type, buffer.isLineProtocol ? buffer.rows[0].timestamp : buffer.rows[0].create_date); - await fs.promises.mkdir(path.dirname(filePath), { recursive: true }); + // Create writer with appropriate schema const writer = await parquet.ParquetWriter.openFile( buffer.schema, filePath, this.writerOptions ); + // Write rows based on type for (const data of buffer.rows) { if (buffer.isLineProtocol) { await writer.appendRow(data); @@ -235,8 +207,10 @@ class ParquetBufferManager { await writer.close(); + // Get file stats const stats = await fs.promises.stat(filePath); + // Update metadata await this.updateMetadata( type, filePath, @@ -246,16 +220,16 @@ class ParquetBufferManager { d.timestamp : new Date(d.create_date)) ); + // Clear buffer this.buffers.set(type, { rows: [], schema: buffer.schema, isLineProtocol: buffer.isLineProtocol }); - console.log(`Wrote ${buffer.rows.length} records to ${path.basename(filePath)}`); + console.log(`Wrote ${buffer.rows.length} records to ${filePath}`); } catch (error) { - console.error(`Parquet flush error for ${type}:`, error); - throw error; + console.error(`Parquet flush error:`, error); } } @@ -270,129 +244,80 @@ class ParquetBufferManager { ); } - async acquireMetadataLock(type) { - while (this.metadataLocks.get(type)) { - await new Promise(resolve => setTimeout(resolve, 100)); - } - this.metadataLocks.set(type, true); - } - - async releaseMetadataLock(type) { - this.metadataLocks.set(type, false); - } - async writeTypeMetadata(type, metadata) { - await this.acquireMetadataLock(type); + const metadataPath = this.getTypeMetadataPath(type); + await fs.promises.mkdir(path.dirname(metadataPath), { recursive: true }); + const tempPath = `${metadataPath}.tmp`; try { - const metadataPath = this.getTypeMetadataPath(type); - const dirPath = path.dirname(metadataPath); - - // Ensure directory exists - await fs.promises.mkdir(dirPath, { recursive: true }); - - // Write to temp file first - const tempPath = `${metadataPath}.tmp`; await fs.promises.writeFile(tempPath, JSON.stringify(metadata, null, 2)); - - // Verify temp file exists and is valid JSON - const tempContent = await fs.promises.readFile(tempPath, 'utf8'); - JSON.parse(tempContent); // Validate JSON - - // Atomic rename await fs.promises.rename(tempPath, metadataPath); - - // Verify final file exists - await fs.promises.access(metadataPath); - } catch (error) { - console.error(`Error writing metadata for type ${type}:`, error); + try { + await fs.promises.unlink(tempPath); + } catch (e) { + // Ignore cleanup errors + } throw error; - } finally { - await this.releaseMetadataLock(type); } } async updateMetadata(type, filePath, sizeBytes, rowCount, timestamps) { - await this.acquireMetadataLock(type); - - try { - // Get current metadata first - const typeMetadata = await this.getTypeMetadata(type); - - // Process timestamps in chunks to avoid stack overflow - const chunkSize = 1000; - let minTime = Infinity; - let maxTime = -Infinity; - - for (let i = 0; i < timestamps.length; i += chunkSize) { - const chunk = timestamps.slice(i, i + chunkSize); - for (const timestamp of chunk) { - let timeNanos; - if (timestamp instanceof Date) { - timeNanos = BigInt(timestamp.getTime()) * BigInt(1000000); - } else if (typeof timestamp === 'bigint') { - timeNanos = timestamp; - } else { - timeNanos = BigInt(Math.floor(timestamp)) * BigInt(1000000); - } - - const timeMs = Number(timeNanos / BigInt(1000000)); - minTime = Math.min(minTime, timeMs); - maxTime = Math.max(maxTime, timeMs); - } - } + const minTime = Math.min(...timestamps.map(t => t.getTime() * 1000000)); + const maxTime = Math.max(...timestamps.map(t => t.getTime() * 1000000)); + const chunkTime = Math.floor(minTime / 600000000000) * 600000000000; - // Convert back to nanoseconds - minTime = minTime * 1000000; - maxTime = maxTime * 1000000; - const chunkTime = Math.floor(minTime / 600000000000) * 600000000000; - - const fileInfo = { - id: typeMetadata.files.length, - path: filePath, - size_bytes: sizeBytes, - row_count: rowCount, - chunk_time: chunkTime, - min_time: minTime, - max_time: maxTime, - type: 'raw' - }; + // Get current type metadata + const typeMetadata = await this.getTypeMetadata(type); - // Update metadata - typeMetadata.files.push(fileInfo); - typeMetadata.parquet_size_bytes += sizeBytes; - typeMetadata.row_count += rowCount; - typeMetadata.min_time = typeMetadata.min_time ? - Math.min(typeMetadata.min_time, minTime) : minTime; - typeMetadata.max_time = typeMetadata.max_time ? - Math.max(typeMetadata.max_time, maxTime) : maxTime; - typeMetadata.wal_sequence++; + const fileInfo = { + id: typeMetadata.files.length, + path: filePath, + size_bytes: sizeBytes, + row_count: rowCount, + chunk_time: chunkTime, + min_time: minTime, + max_time: maxTime, + type: 'raw' + }; - // Write updated metadata - await this.writeTypeMetadata(type, typeMetadata); - - } catch (error) { - console.error(`Error updating metadata for type ${type}:`, error); - throw error; - } finally { - await this.releaseMetadataLock(type); - } + // Update type metadata + typeMetadata.files.push(fileInfo); + typeMetadata.parquet_size_bytes += sizeBytes; + typeMetadata.row_count += rowCount; + typeMetadata.min_time = typeMetadata.min_time ? + Math.min(typeMetadata.min_time, minTime) : minTime; + typeMetadata.max_time = typeMetadata.max_time ? + Math.max(typeMetadata.max_time, maxTime) : maxTime; + typeMetadata.wal_sequence++; + + // Write updated metadata + await this.writeTypeMetadata(type, typeMetadata); } async close() { - // Clear flush interval first - if (this.flushInterval) { - clearInterval(this.flushInterval); + for (const type of this.buffers.keys()) { + await this.flush(type); } + } - // Final flush of all buffers - for (const type of this.buffers.keys()) { - try { - await this.flush(type); - } catch (error) { - console.error(`Error flushing buffer for ${type}:`, error); - } + async ensureDirectories() { + const metadataDir = path.join(this.baseDir, this.writerId); + await fs.promises.mkdir(metadataDir, { recursive: true }); + + // Write initial metadata file if it doesn't exist + const metadataPath = path.join(metadataDir, 'metadata.json'); + if (!fs.existsSync(metadataPath)) { + const initialMetadata = { + writer_id: this.writerId, + next_db_id: 0, + next_table_id: 0 + }; + + await fs.promises.writeFile( + metadataPath, + JSON.stringify(initialMetadata, null, 2) + ); } } @@ -471,75 +396,70 @@ class ParquetBufferManager { async addLineProtocolBulk(measurement, rows) { const type = measurement; - try { - if (!this.buffers.has(type)) { - // Get existing schema if any - let existingSchema = null; - try { - const typeMetadata = await this.getTypeMetadata(type); - if (typeMetadata.files.length > 0) { - const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); - existingSchema = reader.schema; - await reader.close(); - } - } catch (error) { - // Silently create new schema + if (!this.buffers.has(type)) { + // Get existing schema if any + let existingSchema = null; + try { + const typeMetadata = await this.getTypeMetadata(type); + if (typeMetadata.files.length > 0) { + const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); + existingSchema = reader.schema; + await reader.close(); } + } catch (error) { + console.log(`No existing schema found for ${type}, creating new one`); + } - // Merge schemas - const newFields = {}; - rows.forEach(row => { - Object.entries(row).forEach(([key, value]) => { - if (key !== 'timestamp' && key !== 'tags') { - newFields[key] = { - type: typeof value === 'number' ? 'DOUBLE' : - typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', - optional: true - }; - } - }); + // Merge schemas + const newFields = {}; + rows.forEach(row => { + Object.entries(row).forEach(([key, value]) => { + if (key !== 'timestamp' && key !== 'tags') { + newFields[key] = { + type: typeof value === 'number' ? 'DOUBLE' : + typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', + optional: true // Make all fields optional + }; + } }); + }); - const schema = new parquet.ParquetSchema({ - timestamp: { type: 'TIMESTAMP_MILLIS' }, - tags: { type: 'UTF8' }, - ...newFields - }); + const schema = new parquet.ParquetSchema({ + timestamp: { type: 'TIMESTAMP_MILLIS' }, + tags: { type: 'UTF8' }, + ...newFields + }); - this.buffers.set(type, { - rows: [], - schema, - isLineProtocol: true - }); - } + this.buffers.set(type, { + rows: [], + schema, + isLineProtocol: true + }); + } - const buffer = this.buffers.get(type); - - // Ensure all fields are present - const allFields = new Set(); - buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); - - const normalizedRows = rows.map(row => { - const normalized = { - timestamp: row.timestamp, - tags: row.tags - }; - allFields.forEach(field => { - if (field !== 'timestamp' && field !== 'tags') { - normalized[field] = row[field] ?? null; - } - }); - return normalized; + const buffer = this.buffers.get(type); + + // Ensure all rows have all fields + const allFields = new Set(); + buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); + + const normalizedRows = rows.map(row => { + const normalized = { + timestamp: row.timestamp, + tags: row.tags + }; + allFields.forEach(field => { + if (field !== 'timestamp' && field !== 'tags') { + normalized[field] = row[field] ?? null; + } }); + return normalized; + }); - buffer.rows.push(...normalizedRows); + buffer.rows.push(...normalizedRows); - if (buffer.rows.length >= this.bufferSize) { - await this.flush(type); - } - } catch (error) { - console.error(`Error in addLineProtocolBulk for ${type}:`, error); - throw error; + if (buffer.rows.length >= this.bufferSize) { + await this.flush(type); } } } @@ -1222,7 +1142,7 @@ class HEPServer { } bulkData.get(measurement).push({ - timestamp: new Date(parsed.timestampMs), // Use millisecond timestamp for Date + timestamp: new Date(parsed.timestamp), tags: JSON.stringify(parsed.tags), ...parsed.fields }); @@ -1274,60 +1194,50 @@ class HEPServer { async shutdown() { console.log('Shutting down HEP server...'); - // Stop accepting new connections first + // Stop compaction first + if (this.compaction) { + await this.compaction.close(); + } + + // Stop TCP server if (this.tcpServer) { try { this.tcpServer.stop(true); + this.tcpServer.unref(); } catch (error) { console.error('Error stopping TCP server:', error); } } + // Stop UDP server if (this.udpServer) { try { - if (this.udpServer.close) this.udpServer.close(); + // UDP sockets use close() not stop() + if (this.udpSever?.close) this.udpServer.close(); } catch (error) { console.error('Error stopping UDP server:', error); } } + // Stop HTTP server if (this.httpServer) { try { this.httpServer.stop(true); + this.httpServer.unref(); } catch (error) { console.error('Error stopping HTTP server:', error); } } - - // Stop compaction and intervals - if (this.compaction) { - try { - await this.compaction.close(); - } catch (error) { - console.error('Error stopping compaction:', error); - } - } - - // Stop buffer manager intervals - if (this.buffer) { - try { - // Clear flush interval - if (this.buffer.flushInterval) { - clearInterval(this.buffer.flushInterval); - } - // Final flush - await this.buffer.close(); - } catch (error) { - console.error('Error closing buffer manager:', error); - } + + // Flush any remaining data + try { + await this.buffer.close(); + } catch (error) { + console.error('Error flushing buffers:', error); } - + console.log('Server shutdown complete'); - // Force exit after 1 second if still running - setTimeout(() => { - console.log('Forcing exit...'); - process.exit(0); - }, 1000); + process.exit(0); } handleData(data, socket) { diff --git a/lineproto.js b/lineproto.js index 781f612..231d41e 100644 --- a/lineproto.js +++ b/lineproto.js @@ -30,13 +30,21 @@ function formatValue(v, numericType) { const STRING_REGEX = /^"(.*)"$/; function parseValue(value) { - if (!value) return value; - if (value === 'true') return true; - if (value === 'false') return false; - if (value === 'null' || value === 'NULL') return null; - if (value.startsWith('"')) return value.slice(1, -1); - const num = value.includes('.') ? parseFloat(value) : parseInt(value); - return isNaN(num) ? value : num; + if (value == null) { + return undefined; + } else if (INT_REGEX.test(value)) { + return parseInt(value.slice(0, -1)); + } else if (TRUE_REGEX.test(value)) { + return true; + } else if (FALSE_REGEX.test(value)) { + return false; + } else if (STRING_REGEX.test(value)) { + return value.slice(1, -1); + } else if (!isNaN(value)) { + return parseFloat(value); + } else { + return undefined; + } } function joinObject(obj, withFormatting, config) { @@ -76,48 +84,9 @@ function formatValue(v, numericType) { }, {}); if (timestamp) { - // Handle different timestamp formats - if (/^\d{19}$/.test(timestamp)) { - // Nanosecond precision - store as nanoseconds and provide milliseconds - const nanos = BigInt(timestamp); - result.timestamp = nanos; - result.timestampMs = Number(nanos / BigInt(1000000)); - if (process.env.DEBUG) { - console.log('Parsed nanosecond timestamp:', { - original: timestamp, - nanos: nanos.toString(), - ms: result.timestampMs, - date: new Date(result.timestampMs).toISOString() - }); - } - } else if (/^\d+$/.test(timestamp)) { - // Regular numeric timestamp - assume milliseconds - result.timestampMs = parseInt(timestamp); - result.timestamp = BigInt(result.timestampMs) * BigInt(1000000); - if (process.env.DEBUG) { - console.log('Parsed millisecond timestamp:', { - original: timestamp, - ms: result.timestampMs, - date: new Date(result.timestampMs).toISOString() - }); - } - } else { - // Fallback to current time - const now = Date.now(); - result.timestampMs = now; - result.timestamp = BigInt(now) * BigInt(1000000); - if (process.env.DEBUG) { - console.log('Using current timestamp:', { - ms: result.timestampMs, - date: new Date(result.timestampMs).toISOString() - }); - } - } + result.timestamp = parseInt(timestamp) / 1000000; } else if (config.addTimestamp) { - // Current time - const now = Date.now(); - result.timestampMs = now; - result.timestamp = BigInt(now) * BigInt(1000000); + result.timestamp = Date.now(); } return result; From a25d2349aad6196f054244f8e5d17565408687af Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 13:54:42 +0100 Subject: [PATCH 43/60] fix timestamping --- hepop.js | 41 ++++++++++++++++++++++++------------ lineproto.js | 59 ++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/hepop.js b/hepop.js index c3abb41..4ee5fc9 100644 --- a/hepop.js +++ b/hepop.js @@ -39,6 +39,9 @@ class ParquetBufferManager { tags: { type: 'UTF8' }, // JSON string of tags // Dynamic fields will be added based on data }); + + // Add metadata write queue + this.metadataQueue = new Map(); // type -> Promise } async initialize() { @@ -245,21 +248,33 @@ class ParquetBufferManager { } async writeTypeMetadata(type, metadata) { - const metadataPath = this.getTypeMetadataPath(type); - await fs.promises.mkdir(path.dirname(metadataPath), { recursive: true }); - - const tempPath = `${metadataPath}.tmp`; - try { - await fs.promises.writeFile(tempPath, JSON.stringify(metadata, null, 2)); - await fs.promises.rename(tempPath, metadataPath); - } catch (error) { + // Get or create queue for this type + if (!this.metadataQueue.has(type)) { + this.metadataQueue.set(type, Promise.resolve()); + } + + // Add write to queue + const queue = this.metadataQueue.get(type); + const writePromise = queue.then(async () => { + const metadataPath = this.getTypeMetadataPath(type); + await fs.promises.mkdir(path.dirname(metadataPath), { recursive: true }); + + const tempPath = `${metadataPath}.tmp`; try { - await fs.promises.unlink(tempPath); - } catch (e) { - // Ignore cleanup errors + await fs.promises.writeFile(tempPath, JSON.stringify(metadata, null, 2)); + await fs.promises.rename(tempPath, metadataPath); + } catch (error) { + try { + await fs.promises.unlink(tempPath); + } catch (e) { + // Ignore cleanup errors + } + throw error; } - throw error; - } + }); + + this.metadataQueue.set(type, writePromise); + return writePromise; } async updateMetadata(type, filePath, sizeBytes, rowCount, timestamps) { diff --git a/lineproto.js b/lineproto.js index 231d41e..42e9310 100644 --- a/lineproto.js +++ b/lineproto.js @@ -21,6 +21,9 @@ function formatValue(v, numericType) { } function formatDate(date) { + if (typeof date === 'string' && /^\d{19}$/.test(date)) { + return date; // Already in nanoseconds + } return (date instanceof Date ? date.getTime() : date) * 1000000; } @@ -59,36 +62,66 @@ function formatValue(v, numericType) { .join(','); } + // Fast timestamp parsing using length checks and single conversion + function parseTimestamp(timestamp) { + if (!timestamp) return Date.now(); + const len = timestamp.length; + + // Most common case first: nanoseconds (19 digits) + if (len === 19) { + // Fast path: direct division to ms + return Math.floor(Number(timestamp) / 1000000); + } + + // Convert once and reuse + const num = Number(timestamp); + + // Handle other precisions + switch (len) { + case 16: // microseconds + return Math.floor(num / 1000); + case 13: // milliseconds + return num; + case 10: // seconds + return num * 1000; + default: + return num; + } + } + function parse(point, config) { const result = {}; - const [tags_, fields_, timestamp] = point.split(' '); - + + // Fast path: tags parsing const tags = (tags_ || '').split(','); - const fields = (fields_ || '').split(','); - result.measurement = tags.shift(); - result.tags = tags.reduce((out, tag) => { if (!tag) return out; - var [key, value] = tag.split('='); + const [key, value] = tag.split('='); out[key] = value; return out; }, {}); - - result.fields = fields.reduce((out, field) => { + + // Fast path: fields parsing + result.fields = fields_.split(',').reduce((out, field) => { if (!field) return out; - var [key, value] = field.split('='); + const [key, value] = field.split('='); out[key] = parseValue(value); return out; }, {}); - + + // Fast path: timestamp handling if (timestamp) { - result.timestamp = parseInt(timestamp) / 1000000; + result.timestamp = parseTimestamp(timestamp); + // Store original precision for parquet + result.timestampNano = timestamp; } else if (config.addTimestamp) { - result.timestamp = Date.now(); + const now = Date.now(); + result.timestamp = now; + result.timestampNano = (BigInt(now) * 1000000n).toString(); } - + return result; } From 31a111d6de560262678bac863641af4ca5fb9c7c Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:12:47 +0100 Subject: [PATCH 44/60] fix timestamping --- hepop.js | 101 +++++++++++++++++++------- query.js | 213 ++++++++++++++++++++++++------------------------------- 2 files changed, 168 insertions(+), 146 deletions(-) diff --git a/hepop.js b/hepop.js index 4ee5fc9..d23ecd4 100644 --- a/hepop.js +++ b/hepop.js @@ -316,26 +316,6 @@ class ParquetBufferManager { } } - async ensureDirectories() { - const metadataDir = path.join(this.baseDir, this.writerId); - await fs.promises.mkdir(metadataDir, { recursive: true }); - - // Write initial metadata file if it doesn't exist - const metadataPath = path.join(metadataDir, 'metadata.json'); - if (!fs.existsSync(metadataPath)) { - const initialMetadata = { - writer_id: this.writerId, - next_db_id: 0, - next_table_id: 0 - }; - - await fs.promises.writeFile( - metadataPath, - JSON.stringify(initialMetadata, null, 2) - ); - } - } - async addLineProtocol(data) { const measurement = data.measurement; if (!this.buffers.has(measurement)) { @@ -408,7 +388,7 @@ class ParquetBufferManager { } } - async addLineProtocolBulk(measurement, rows) { + async addLineProtocolBulk(measurement, rows, dbName) { const type = measurement; if (!this.buffers.has(type)) { @@ -1013,6 +993,72 @@ class CompactionManager { } } + async cleanupEmptyDirectories() { + try { + // Get current hour path format + const now = new Date(); + const currentHourPath = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}-${String(now.getDate()).padStart(2, '0')}/${String(now.getHours()).padStart(2, '0')}`; + + // Get all database directories + const dbsDir = path.join(this.bufferManager.baseDir, this.bufferManager.writerId, 'dbs'); + const databases = await fs.promises.readdir(dbsDir); + + for (const db of databases) { + const dbPath = path.join(dbsDir, db); + const tables = await fs.promises.readdir(dbPath); + + for (const table of tables) { + const tablePath = path.join(dbPath, table); + if (!(await fs.promises.stat(tablePath)).isDirectory()) continue; + + // Get all date directories + const dates = await fs.promises.readdir(tablePath); + for (const date of dates) { + const datePath = path.join(tablePath, date); + if (!(await fs.promises.stat(datePath)).isDirectory()) continue; + + // Get hour directories + const hours = await fs.promises.readdir(datePath); + for (const hour of hours) { + const hourPath = path.join(datePath, hour); + if (!(await fs.promises.stat(hourPath)).isDirectory()) continue; + + // Skip current hour + const dirPath = `${date}/${hour}`; + if (dirPath.startsWith(currentHourPath)) { + continue; + } + + // Check if directory is empty after compaction + const files = await fs.promises.readdir(hourPath); + if (files.length === 0) { + await fs.promises.rmdir(hourPath); + console.log(`Removed empty directory: ${hourPath}`); + + // Try to remove parent if empty + const parentFiles = await fs.promises.readdir(datePath); + if (parentFiles.length === 0) { + await fs.promises.rmdir(datePath); + console.log(`Removed empty date directory: ${datePath}`); + } + } + } + } + } + } + } catch (error) { + console.error('Error cleaning up directories:', error); + } + } + + // Add to compaction interval + startCompactionInterval() { + this.compactionInterval = setInterval(async () => { + await this.compact(); + await this.cleanupEmptyDirectories(); // Add cleanup after compaction + }, this.interval); + } + async close() { if (this.compactionInterval) { clearInterval(this.compactionInterval); @@ -1088,6 +1134,9 @@ class HEPServer { async fetch(req) { const url = new URL(req.url); + // Get db parameter, default to 'hep' if not provided + const dbName = url.searchParams.get('db') || 'hep'; + if (url.pathname === '/') { try { const html = await Bun.file('./index.html').text(); @@ -1117,7 +1166,7 @@ class HEPServer { return new Response('Method not allowed', { status: 405 }); } - const result = await self.queryClient.query(query); + const result = await self.queryClient.query(query, { db: dbName }); // Handle BigInt serialization const safeResult = JSON.parse(JSON.stringify(result, (key, value) => @@ -1142,11 +1191,12 @@ class HEPServer { const config = { addTimestamp: true, typeMappings: [], - defaultTypeMapping: 'float' + defaultTypeMapping: 'float', + dbName // Pass database name to buffer manager }; // Process lines in bulk - const bulkData = new Map(); // measurement -> rows + const bulkData = new Map(); for (const line of lines) { const parsed = parse(line, config); @@ -1165,8 +1215,7 @@ class HEPServer { // Bulk insert by measurement for (const [measurement, rows] of bulkData) { - // console.log(`Writing ${rows.length} rows to measurement ${measurement}`); - await self.buffer.addLineProtocolBulk(measurement, rows); + await self.buffer.addLineProtocolBulk(measurement, rows, dbName); } return new Response(null, { status: 201 }); diff --git a/query.js b/query.js index 8cf96d2..61a650a 100644 --- a/query.js +++ b/query.js @@ -142,142 +142,115 @@ class QueryClient { }; } - async query(sql) { - if (!this.db) { - throw new Error('QueryClient not initialized'); - } + async query(sql, options = {}) { + const parsed = this.parseQuery(sql); + const buffer = await this.getBuffer(); try { - const parsed = this.parseQuery(sql); - if (!parsed.type) { - throw new Error('Could not determine type from query'); - } - - const files = await this.findRelevantFiles(parsed.type, parsed.timeRange); - const connection = await this.db.connect(); - - try { - // Get buffered data for this type - const buffer = this.buffer?.buffers.get(parsed.type); - let query; - - if (buffer?.rows?.length) { - // Create temp table from buffer using VALUES - const valuesQuery = ` - CREATE TEMP TABLE IF NOT EXISTS buffer_data AS - SELECT * FROM (VALUES ${buffer.rows.map(row => { - if (buffer.isLineProtocol) { - return `( - TIMESTAMP '${row.timestamp.toISOString()}', - '${row.tags}', - ${Object.entries(row) - .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) - .join(', ')} - )`; - } else { - return `( - TIMESTAMP '${new Date(row.create_date).toISOString()}', - '${JSON.stringify(row.protocol_header)}', - '${row.raw || ''}' - )`; - } - }).join(', ')}) - AS t(${buffer.isLineProtocol ? - `timestamp, tags, ${Object.keys(buffer.rows[0]) - .filter(k => !['timestamp', 'tags'].includes(k)) - .join(', ')}` : - 'timestamp, rcinfo, payload'}) - `; - - await connection.runAndReadAll(valuesQuery); - - if (files.length > 0) { - // For aggregate queries, combine data before aggregating - const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || - parsed.columns.toLowerCase().includes('avg('); - - if (isAggregateQuery) { - query = ` - WITH all_data AS ( - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - UNION ALL - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - ) - SELECT ${parsed.columns} - FROM all_data - ${parsed.orderBy} - ${parsed.limit} - `; - } else { - query = ` - SELECT ${parsed.columns} - FROM ( - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - UNION ALL - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - ) combined_data - ${parsed.orderBy} - ${parsed.limit} - `; - } - } else { - // Only query buffer data - query = ` - SELECT ${parsed.columns} + // Get database name from options or default to 'hep' + const dbName = options.db || 'hep'; + + // Get matching files for time range and database + const files = await this.getFilesForTimeRange(parsed.timeRange, dbName); + + // Build query with union_by_name=true + let query; + if (files.length > 0) { + const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || + parsed.columns.toLowerCase().includes('avg('); + + if (isAggregateQuery) { + query = ` + WITH all_data AS ( + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - ${parsed.orderBy} - ${parsed.limit} - `; - } - } else if (files.length > 0) { - // Only query parquet files - query = ` + ) SELECT ${parsed.columns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) - ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} - ${parsed.conditions} + FROM all_data ${parsed.orderBy} ${parsed.limit} `; } else { - // No data available - return []; + query = ` + SELECT ${parsed.columns} + FROM ( + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) combined_data + ${parsed.orderBy} + ${parsed.limit} + `; } + } else { + // Only query buffer data + query = ` + SELECT ${parsed.columns} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ${parsed.orderBy} + ${parsed.limit} + `; + } - const reader = await connection.runAndReadAll(query); - return reader.getRows().map(row => { - const obj = {}; - reader.columnNames().forEach((col, i) => { - obj[col] = row[i]; - }); - return obj; + return await this.db.all(query); + } catch (error) { + console.error('Query error:', error); + throw error; + } + } + + async getFilesForTimeRange(timeRange, dbName = 'hep') { + // Modify path to include database + const dbPath = path.join( + this.baseDir, + this.writerId, + 'dbs', + `${dbName}-${this.metadata.next_db_id}` + ); + + try { + const files = []; + const types = await fs.promises.readdir(dbPath); + + for (const type of types) { + if (!type.startsWith('hep_')) continue; + + const typePath = path.join(dbPath, type); + const metadata = await this.getTypeMetadata(type, dbName); + + // Filter files within time range + const relevantFiles = metadata.files.filter(file => { + return file.min_time <= timeRange.end && file.max_time >= timeRange.start; }); - } finally { - await connection.close(); + + files.push(...relevantFiles); } + + return files; } catch (error) { - console.error('Query error:', error); + if (error.code === 'ENOENT') { + return []; + } throw error; } } From 43b6cfdaf79c10bc5d25af0488ad927e72067cf5 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:13:33 +0100 Subject: [PATCH 45/60] fix timestamping --- hepop.js | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/hepop.js b/hepop.js index d23ecd4..005562e 100644 --- a/hepop.js +++ b/hepop.js @@ -457,6 +457,30 @@ class ParquetBufferManager { await this.flush(type); } } + + async ensureDirectories() { + const metadataDir = path.join(this.baseDir, this.writerId); + await fs.promises.mkdir(metadataDir, { recursive: true }); + + // Write initial metadata file if it doesn't exist + const metadataPath = path.join(metadataDir, 'metadata.json'); + if (!fs.existsSync(metadataPath)) { + const initialMetadata = { + writer_id: this.writerId, + next_db_id: 0, + next_table_id: 0 + }; + + await fs.promises.writeFile( + metadataPath, + JSON.stringify(initialMetadata, null, 2) + ); + } + + // Create dbs directory + const dbsDir = path.join(metadataDir, 'dbs'); + await fs.promises.mkdir(dbsDir, { recursive: true }); + } } class CompactionManager { From be38be7b24165a14733712cac0214b6d1815ae49 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:14:41 +0100 Subject: [PATCH 46/60] fix timestamping --- query.js | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/query.js b/query.js index 61a650a..68796ea 100644 --- a/query.js +++ b/query.js @@ -144,7 +144,11 @@ class QueryClient { async query(sql, options = {}) { const parsed = this.parseQuery(sql); - const buffer = await this.getBuffer(); + + // Use the buffer manager reference directly + if (!this.buffer) { + throw new Error('No buffer manager available'); + } try { // Get database name from options or default to 'hep' @@ -162,13 +166,13 @@ class QueryClient { if (isAggregateQuery) { query = ` WITH all_data AS ( - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' @@ -183,13 +187,13 @@ class QueryClient { query = ` SELECT ${parsed.columns} FROM ( - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' From 5e227266b44908dca6117c0812fa1e6e771d96c2 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:15:41 +0100 Subject: [PATCH 47/60] fix timestamping --- query.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/query.js b/query.js index 68796ea..bcf6fac 100644 --- a/query.js +++ b/query.js @@ -224,12 +224,16 @@ class QueryClient { } async getFilesForTimeRange(timeRange, dbName = 'hep') { - // Modify path to include database + if (!this.buffer) { + throw new Error('No buffer manager available'); + } + + // Use buffer manager's metadata const dbPath = path.join( this.baseDir, - this.writerId, + this.buffer.writerId, 'dbs', - `${dbName}-${this.metadata.next_db_id}` + `${dbName}-${this.buffer.metadata.next_db_id}` ); try { @@ -240,7 +244,8 @@ class QueryClient { if (!type.startsWith('hep_')) continue; const typePath = path.join(dbPath, type); - const metadata = await this.getTypeMetadata(type, dbName); + // Use buffer manager's method to get metadata + const metadata = await this.buffer.getTypeMetadata(type); // Filter files within time range const relevantFiles = metadata.files.filter(file => { From 54138a5a04cdd74db54fa6e14bafb190542c700d Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:16:39 +0100 Subject: [PATCH 48/60] fix timestamping --- query.js | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/query.js b/query.js index bcf6fac..f5f6ecd 100644 --- a/query.js +++ b/query.js @@ -6,6 +6,7 @@ class QueryClient { constructor(baseDir = './data', bufferManager = null) { this.baseDir = baseDir; this.db = null; + this.connection = null; this.defaultTimeRange = 10 * 60 * 1000000000; // 10 minutes in nanoseconds this.buffer = bufferManager; // Store reference to buffer manager } @@ -13,6 +14,8 @@ class QueryClient { async initialize() { try { this.db = await DuckDBInstance.create(':memory:'); + // Create initial connection + this.connection = await this.db.connect(); console.log('Initialized DuckDB for querying'); } catch (error) { console.error('Failed to initialize DuckDB:', error); @@ -145,7 +148,6 @@ class QueryClient { async query(sql, options = {}) { const parsed = this.parseQuery(sql); - // Use the buffer manager reference directly if (!this.buffer) { throw new Error('No buffer manager available'); } @@ -216,7 +218,18 @@ class QueryClient { `; } - return await this.db.all(query); + // Execute query using proper DuckDB API + const result = await this.connection.runAndReadAll(query); + + // Convert result to array of objects + return result.getRows().map(row => { + const obj = {}; + result.columnNames().forEach((col, i) => { + obj[col] = row[i]; + }); + return obj; + }); + } catch (error) { console.error('Query error:', error); throw error; @@ -265,7 +278,12 @@ class QueryClient { } async close() { - // Nothing to clean up + if (this.connection) { + await this.connection.close(); + } + if (this.db) { + await this.db.close(); + } } } From b5cf06fe054b2670988c830aafa281931717ef7e Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:19:27 +0100 Subject: [PATCH 49/60] fix timestamping --- query.js | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/query.js b/query.js index f5f6ecd..1ff8756 100644 --- a/query.js +++ b/query.js @@ -153,12 +153,72 @@ class QueryClient { } try { - // Get database name from options or default to 'hep' const dbName = options.db || 'hep'; - - // Get matching files for time range and database const files = await this.getFilesForTimeRange(parsed.timeRange, dbName); + // Drop existing temp table if it exists + await this.connection.runAndReadAll(`DROP TABLE IF EXISTS buffer_data`); + + // Create temp table from buffer if there's data + const buffer = this.buffer.buffers.get(parsed.type); + if (buffer?.rows?.length) { + // Create temp table with proper schema first + await this.connection.runAndReadAll(` + CREATE TEMP TABLE buffer_data ( + timestamp TIMESTAMP, + ${buffer.isLineProtocol ? + `tags VARCHAR, + ${Object.keys(buffer.rows[0]) + .filter(k => !['timestamp', 'tags'].includes(k)) + .join(' VARCHAR,\n ')} VARCHAR` : + `rcinfo VARCHAR, + payload VARCHAR`} + ) + `); + + // Insert data in batches to avoid query size limits + const batchSize = 1000; + for (let i = 0; i < buffer.rows.length; i += batchSize) { + const batch = buffer.rows.slice(i, i + batchSize); + await this.connection.runAndReadAll(` + INSERT INTO buffer_data + SELECT * FROM (VALUES ${batch.map(row => { + if (buffer.isLineProtocol) { + return `( + TIMESTAMP '${row.timestamp.toISOString()}', + '${row.tags}', + ${Object.entries(row) + .filter(([k]) => !['timestamp', 'tags'].includes(k)) + .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) + .join(', ')} + )`; + } else { + return `( + TIMESTAMP '${new Date(row.create_date).toISOString()}', + '${JSON.stringify(row.protocol_header)}', + '${row.raw || ''}' + )`; + } + }).join(', ')}) + `); + } + } else { + // Create empty temp table + await this.connection.runAndReadAll(` + CREATE TEMP TABLE buffer_data ( + timestamp TIMESTAMP, + ${buffer?.isLineProtocol ? + `tags VARCHAR, + ${Object.keys(buffer?.rows?.[0] || {}) + .filter(k => !['timestamp', 'tags'].includes(k)) + .map(k => `${k} VARCHAR`) + .join(',\n ')}` : + `rcinfo VARCHAR, + payload VARCHAR`} + ) + `); + } + // Build query with union_by_name=true let query; if (files.length > 0) { @@ -218,7 +278,7 @@ class QueryClient { `; } - // Execute query using proper DuckDB API + // Execute query const result = await this.connection.runAndReadAll(query); // Convert result to array of objects From 5fb1b3920a6588e8fb140a9b25fde4df20084137 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 14:20:46 +0100 Subject: [PATCH 50/60] fix timestamping --- query.js | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/query.js b/query.js index 1ff8756..f05f77f 100644 --- a/query.js +++ b/query.js @@ -145,6 +145,20 @@ class QueryClient { }; } + // Helper to determine SQL type from JavaScript value + getColumnType(value) { + if (typeof value === 'number') { + if (Number.isInteger(value)) { + return 'BIGINT'; + } + return 'DOUBLE'; + } + if (typeof value === 'boolean') { + return 'BOOLEAN'; + } + return 'VARCHAR'; + } + async query(sql, options = {}) { const parsed = this.parseQuery(sql); @@ -156,27 +170,34 @@ class QueryClient { const dbName = options.db || 'hep'; const files = await this.getFilesForTimeRange(parsed.timeRange, dbName); - // Drop existing temp table if it exists await this.connection.runAndReadAll(`DROP TABLE IF EXISTS buffer_data`); - // Create temp table from buffer if there's data const buffer = this.buffer.buffers.get(parsed.type); if (buffer?.rows?.length) { - // Create temp table with proper schema first + // Get column types from first row + const columnTypes = new Map(); + const firstRow = buffer.rows[0]; + Object.entries(firstRow).forEach(([key, value]) => { + if (!['timestamp', 'tags'].includes(key)) { + columnTypes.set(key, this.getColumnType(value)); + } + }); + + // Create temp table with proper types await this.connection.runAndReadAll(` CREATE TEMP TABLE buffer_data ( timestamp TIMESTAMP, ${buffer.isLineProtocol ? `tags VARCHAR, - ${Object.keys(buffer.rows[0]) - .filter(k => !['timestamp', 'tags'].includes(k)) - .join(' VARCHAR,\n ')} VARCHAR` : + ${Array.from(columnTypes.entries()) + .map(([key, type]) => `${key} ${type}`) + .join(',\n ')}` : `rcinfo VARCHAR, payload VARCHAR`} ) `); - // Insert data in batches to avoid query size limits + // Insert data in batches const batchSize = 1000; for (let i = 0; i < buffer.rows.length; i += batchSize) { const batch = buffer.rows.slice(i, i + batchSize); @@ -189,7 +210,13 @@ class QueryClient { '${row.tags}', ${Object.entries(row) .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) + .map(([k, v]) => { + const type = columnTypes.get(k); + if (type === 'VARCHAR') { + return `'${v}'`; + } + return v === null ? 'NULL' : v; + }) .join(', ')} )`; } else { @@ -203,15 +230,15 @@ class QueryClient { `); } } else { - // Create empty temp table + // Create empty table with proper types await this.connection.runAndReadAll(` CREATE TEMP TABLE buffer_data ( timestamp TIMESTAMP, ${buffer?.isLineProtocol ? `tags VARCHAR, - ${Object.keys(buffer?.rows?.[0] || {}) - .filter(k => !['timestamp', 'tags'].includes(k)) - .map(k => `${k} VARCHAR`) + ${Object.entries(buffer?.schema?.fields || {}) + .filter(([k]) => !['timestamp', 'tags'].includes(k)) + .map(([k, f]) => `${k} ${f.type === 'DOUBLE' ? 'DOUBLE' : 'VARCHAR'}`) .join(',\n ')}` : `rcinfo VARCHAR, payload VARCHAR`} From 3b5e6dcb352ad0ec88eeb02318a36c9761e7e7da Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 11 Feb 2025 13:27:42 +0000 Subject: [PATCH 51/60] restore --- hepop.js | 125 +++++------------------- query.js | 293 +++++++++++++++++++------------------------------------ 2 files changed, 129 insertions(+), 289 deletions(-) diff --git a/hepop.js b/hepop.js index 005562e..4ee5fc9 100644 --- a/hepop.js +++ b/hepop.js @@ -316,6 +316,26 @@ class ParquetBufferManager { } } + async ensureDirectories() { + const metadataDir = path.join(this.baseDir, this.writerId); + await fs.promises.mkdir(metadataDir, { recursive: true }); + + // Write initial metadata file if it doesn't exist + const metadataPath = path.join(metadataDir, 'metadata.json'); + if (!fs.existsSync(metadataPath)) { + const initialMetadata = { + writer_id: this.writerId, + next_db_id: 0, + next_table_id: 0 + }; + + await fs.promises.writeFile( + metadataPath, + JSON.stringify(initialMetadata, null, 2) + ); + } + } + async addLineProtocol(data) { const measurement = data.measurement; if (!this.buffers.has(measurement)) { @@ -388,7 +408,7 @@ class ParquetBufferManager { } } - async addLineProtocolBulk(measurement, rows, dbName) { + async addLineProtocolBulk(measurement, rows) { const type = measurement; if (!this.buffers.has(type)) { @@ -457,30 +477,6 @@ class ParquetBufferManager { await this.flush(type); } } - - async ensureDirectories() { - const metadataDir = path.join(this.baseDir, this.writerId); - await fs.promises.mkdir(metadataDir, { recursive: true }); - - // Write initial metadata file if it doesn't exist - const metadataPath = path.join(metadataDir, 'metadata.json'); - if (!fs.existsSync(metadataPath)) { - const initialMetadata = { - writer_id: this.writerId, - next_db_id: 0, - next_table_id: 0 - }; - - await fs.promises.writeFile( - metadataPath, - JSON.stringify(initialMetadata, null, 2) - ); - } - - // Create dbs directory - const dbsDir = path.join(metadataDir, 'dbs'); - await fs.promises.mkdir(dbsDir, { recursive: true }); - } } class CompactionManager { @@ -1017,72 +1013,6 @@ class CompactionManager { } } - async cleanupEmptyDirectories() { - try { - // Get current hour path format - const now = new Date(); - const currentHourPath = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}-${String(now.getDate()).padStart(2, '0')}/${String(now.getHours()).padStart(2, '0')}`; - - // Get all database directories - const dbsDir = path.join(this.bufferManager.baseDir, this.bufferManager.writerId, 'dbs'); - const databases = await fs.promises.readdir(dbsDir); - - for (const db of databases) { - const dbPath = path.join(dbsDir, db); - const tables = await fs.promises.readdir(dbPath); - - for (const table of tables) { - const tablePath = path.join(dbPath, table); - if (!(await fs.promises.stat(tablePath)).isDirectory()) continue; - - // Get all date directories - const dates = await fs.promises.readdir(tablePath); - for (const date of dates) { - const datePath = path.join(tablePath, date); - if (!(await fs.promises.stat(datePath)).isDirectory()) continue; - - // Get hour directories - const hours = await fs.promises.readdir(datePath); - for (const hour of hours) { - const hourPath = path.join(datePath, hour); - if (!(await fs.promises.stat(hourPath)).isDirectory()) continue; - - // Skip current hour - const dirPath = `${date}/${hour}`; - if (dirPath.startsWith(currentHourPath)) { - continue; - } - - // Check if directory is empty after compaction - const files = await fs.promises.readdir(hourPath); - if (files.length === 0) { - await fs.promises.rmdir(hourPath); - console.log(`Removed empty directory: ${hourPath}`); - - // Try to remove parent if empty - const parentFiles = await fs.promises.readdir(datePath); - if (parentFiles.length === 0) { - await fs.promises.rmdir(datePath); - console.log(`Removed empty date directory: ${datePath}`); - } - } - } - } - } - } - } catch (error) { - console.error('Error cleaning up directories:', error); - } - } - - // Add to compaction interval - startCompactionInterval() { - this.compactionInterval = setInterval(async () => { - await this.compact(); - await this.cleanupEmptyDirectories(); // Add cleanup after compaction - }, this.interval); - } - async close() { if (this.compactionInterval) { clearInterval(this.compactionInterval); @@ -1158,9 +1088,6 @@ class HEPServer { async fetch(req) { const url = new URL(req.url); - // Get db parameter, default to 'hep' if not provided - const dbName = url.searchParams.get('db') || 'hep'; - if (url.pathname === '/') { try { const html = await Bun.file('./index.html').text(); @@ -1190,7 +1117,7 @@ class HEPServer { return new Response('Method not allowed', { status: 405 }); } - const result = await self.queryClient.query(query, { db: dbName }); + const result = await self.queryClient.query(query); // Handle BigInt serialization const safeResult = JSON.parse(JSON.stringify(result, (key, value) => @@ -1215,12 +1142,11 @@ class HEPServer { const config = { addTimestamp: true, typeMappings: [], - defaultTypeMapping: 'float', - dbName // Pass database name to buffer manager + defaultTypeMapping: 'float' }; // Process lines in bulk - const bulkData = new Map(); + const bulkData = new Map(); // measurement -> rows for (const line of lines) { const parsed = parse(line, config); @@ -1239,7 +1165,8 @@ class HEPServer { // Bulk insert by measurement for (const [measurement, rows] of bulkData) { - await self.buffer.addLineProtocolBulk(measurement, rows, dbName); + // console.log(`Writing ${rows.length} rows to measurement ${measurement}`); + await self.buffer.addLineProtocolBulk(measurement, rows); } return new Response(null, { status: 201 }); diff --git a/query.js b/query.js index f05f77f..8cf96d2 100644 --- a/query.js +++ b/query.js @@ -6,7 +6,6 @@ class QueryClient { constructor(baseDir = './data', bufferManager = null) { this.baseDir = baseDir; this.db = null; - this.connection = null; this.defaultTimeRange = 10 * 60 * 1000000000; // 10 minutes in nanoseconds this.buffer = bufferManager; // Store reference to buffer manager } @@ -14,8 +13,6 @@ class QueryClient { async initialize() { try { this.db = await DuckDBInstance.create(':memory:'); - // Create initial connection - this.connection = await this.db.connect(); console.log('Initialized DuckDB for querying'); } catch (error) { console.error('Failed to initialize DuckDB:', error); @@ -145,78 +142,37 @@ class QueryClient { }; } - // Helper to determine SQL type from JavaScript value - getColumnType(value) { - if (typeof value === 'number') { - if (Number.isInteger(value)) { - return 'BIGINT'; - } - return 'DOUBLE'; - } - if (typeof value === 'boolean') { - return 'BOOLEAN'; - } - return 'VARCHAR'; - } - - async query(sql, options = {}) { - const parsed = this.parseQuery(sql); - - if (!this.buffer) { - throw new Error('No buffer manager available'); + async query(sql) { + if (!this.db) { + throw new Error('QueryClient not initialized'); } try { - const dbName = options.db || 'hep'; - const files = await this.getFilesForTimeRange(parsed.timeRange, dbName); - - await this.connection.runAndReadAll(`DROP TABLE IF EXISTS buffer_data`); - - const buffer = this.buffer.buffers.get(parsed.type); - if (buffer?.rows?.length) { - // Get column types from first row - const columnTypes = new Map(); - const firstRow = buffer.rows[0]; - Object.entries(firstRow).forEach(([key, value]) => { - if (!['timestamp', 'tags'].includes(key)) { - columnTypes.set(key, this.getColumnType(value)); - } - }); + const parsed = this.parseQuery(sql); + if (!parsed.type) { + throw new Error('Could not determine type from query'); + } - // Create temp table with proper types - await this.connection.runAndReadAll(` - CREATE TEMP TABLE buffer_data ( - timestamp TIMESTAMP, - ${buffer.isLineProtocol ? - `tags VARCHAR, - ${Array.from(columnTypes.entries()) - .map(([key, type]) => `${key} ${type}`) - .join(',\n ')}` : - `rcinfo VARCHAR, - payload VARCHAR`} - ) - `); - - // Insert data in batches - const batchSize = 1000; - for (let i = 0; i < buffer.rows.length; i += batchSize) { - const batch = buffer.rows.slice(i, i + batchSize); - await this.connection.runAndReadAll(` - INSERT INTO buffer_data - SELECT * FROM (VALUES ${batch.map(row => { + const files = await this.findRelevantFiles(parsed.type, parsed.timeRange); + const connection = await this.db.connect(); + + try { + // Get buffered data for this type + const buffer = this.buffer?.buffers.get(parsed.type); + let query; + + if (buffer?.rows?.length) { + // Create temp table from buffer using VALUES + const valuesQuery = ` + CREATE TEMP TABLE IF NOT EXISTS buffer_data AS + SELECT * FROM (VALUES ${buffer.rows.map(row => { if (buffer.isLineProtocol) { return `( TIMESTAMP '${row.timestamp.toISOString()}', '${row.tags}', ${Object.entries(row) .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([k, v]) => { - const type = columnTypes.get(k); - if (type === 'VARCHAR') { - return `'${v}'`; - } - return v === null ? 'NULL' : v; - }) + .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) .join(', ')} )`; } else { @@ -226,151 +182,108 @@ class QueryClient { '${row.raw || ''}' )`; } - }).join(', ')}) - `); - } - } else { - // Create empty table with proper types - await this.connection.runAndReadAll(` - CREATE TEMP TABLE buffer_data ( - timestamp TIMESTAMP, - ${buffer?.isLineProtocol ? - `tags VARCHAR, - ${Object.entries(buffer?.schema?.fields || {}) - .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([k, f]) => `${k} ${f.type === 'DOUBLE' ? 'DOUBLE' : 'VARCHAR'}`) - .join(',\n ')}` : - `rcinfo VARCHAR, - payload VARCHAR`} - ) - `); - } + }).join(', ')}) + AS t(${buffer.isLineProtocol ? + `timestamp, tags, ${Object.keys(buffer.rows[0]) + .filter(k => !['timestamp', 'tags'].includes(k)) + .join(', ')}` : + 'timestamp, rcinfo, payload'}) + `; - // Build query with union_by_name=true - let query; - if (files.length > 0) { - const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || - parsed.columns.toLowerCase().includes('avg('); - - if (isAggregateQuery) { - query = ` - WITH all_data AS ( - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - UNION ALL - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + await connection.runAndReadAll(valuesQuery); + + if (files.length > 0) { + // For aggregate queries, combine data before aggregating + const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || + parsed.columns.toLowerCase().includes('avg('); + + if (isAggregateQuery) { + query = ` + WITH all_data AS ( + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) + SELECT ${parsed.columns} + FROM all_data + ${parsed.orderBy} + ${parsed.limit} + `; + } else { + query = ` + SELECT ${parsed.columns} + FROM ( + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) combined_data + ${parsed.orderBy} + ${parsed.limit} + `; + } + } else { + // Only query buffer data + query = ` + SELECT ${parsed.columns} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - ) - SELECT ${parsed.columns} - FROM all_data - ${parsed.orderBy} - ${parsed.limit} - `; - } else { + ${parsed.orderBy} + ${parsed.limit} + `; + } + } else if (files.length > 0) { + // Only query parquet files query = ` SELECT ${parsed.columns} - FROM ( - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - UNION ALL - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - ) combined_data + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) + ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} + ${parsed.conditions} ${parsed.orderBy} ${parsed.limit} `; + } else { + // No data available + return []; } - } else { - // Only query buffer data - query = ` - SELECT ${parsed.columns} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - ${parsed.orderBy} - ${parsed.limit} - `; - } - - // Execute query - const result = await this.connection.runAndReadAll(query); - - // Convert result to array of objects - return result.getRows().map(row => { - const obj = {}; - result.columnNames().forEach((col, i) => { - obj[col] = row[i]; - }); - return obj; - }); - - } catch (error) { - console.error('Query error:', error); - throw error; - } - } - async getFilesForTimeRange(timeRange, dbName = 'hep') { - if (!this.buffer) { - throw new Error('No buffer manager available'); - } - - // Use buffer manager's metadata - const dbPath = path.join( - this.baseDir, - this.buffer.writerId, - 'dbs', - `${dbName}-${this.buffer.metadata.next_db_id}` - ); - - try { - const files = []; - const types = await fs.promises.readdir(dbPath); - - for (const type of types) { - if (!type.startsWith('hep_')) continue; - - const typePath = path.join(dbPath, type); - // Use buffer manager's method to get metadata - const metadata = await this.buffer.getTypeMetadata(type); - - // Filter files within time range - const relevantFiles = metadata.files.filter(file => { - return file.min_time <= timeRange.end && file.max_time >= timeRange.start; + const reader = await connection.runAndReadAll(query); + return reader.getRows().map(row => { + const obj = {}; + reader.columnNames().forEach((col, i) => { + obj[col] = row[i]; + }); + return obj; }); - - files.push(...relevantFiles); + } finally { + await connection.close(); } - - return files; } catch (error) { - if (error.code === 'ENOENT') { - return []; - } + console.error('Query error:', error); throw error; } } async close() { - if (this.connection) { - await this.connection.close(); - } - if (this.db) { - await this.db.close(); - } + // Nothing to clean up } } From dbea76b3c91c9a15ee16e0e6284fdbd98771e2ef Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 15:27:08 +0100 Subject: [PATCH 52/60] fix timestamping --- hepop.js | 45 +++++++++++++++++++++++++++++---------------- lineproto.js | 42 +++++++++++++++++++++--------------------- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/hepop.js b/hepop.js index 4ee5fc9..d472c65 100644 --- a/hepop.js +++ b/hepop.js @@ -1137,8 +1137,9 @@ class HEPServer { } else if (url.pathname === '/write' && req.method === 'POST') { try { const body = await req.text(); + // Split on newlines and filter empty lines const lines = body.split('\n').filter(line => line.trim()); - + const config = { addTimestamp: true, typeMappings: [], @@ -1146,27 +1147,40 @@ class HEPServer { }; // Process lines in bulk - const bulkData = new Map(); // measurement -> rows + const bulkData = new Map(); for (const line of lines) { - const parsed = parse(line, config); - const measurement = parsed.measurement; - - if (!bulkData.has(measurement)) { - bulkData.set(measurement, []); + try { + const parsed = parse(line, config); + const measurement = parsed.measurement; + + if (!bulkData.has(measurement)) { + bulkData.set(measurement, []); + } + + // Ensure timestamp is a Date object + const timestamp = new Date(parsed.timestamp); + if (isNaN(timestamp.getTime())) { + console.warn(`Invalid timestamp in line: ${line}, using current time`); + timestamp = new Date(); + } + + bulkData.get(measurement).push({ + timestamp, + tags: JSON.stringify(parsed.tags), + ...parsed.fields + }); + } catch (error) { + console.warn(`Error parsing line: ${line}`, error); + continue; // Skip invalid lines } - - bulkData.get(measurement).push({ - timestamp: new Date(parsed.timestamp), - tags: JSON.stringify(parsed.tags), - ...parsed.fields - }); } // Bulk insert by measurement for (const [measurement, rows] of bulkData) { - // console.log(`Writing ${rows.length} rows to measurement ${measurement}`); - await self.buffer.addLineProtocolBulk(measurement, rows); + if (rows.length > 0) { + await self.buffer.addLineProtocolBulk(measurement, rows); + } } return new Response(null, { status: 201 }); @@ -1174,7 +1188,6 @@ class HEPServer { console.error('Write error:', error); return new Response(error.message, { status: 400 }); } - } return new Response('Not found', { status: 404 }); diff --git a/lineproto.js b/lineproto.js index 42e9310..4e7602f 100644 --- a/lineproto.js +++ b/lineproto.js @@ -69,24 +69,32 @@ function formatValue(v, numericType) { // Most common case first: nanoseconds (19 digits) if (len === 19) { - // Fast path: direct division to ms return Math.floor(Number(timestamp) / 1000000); } // Convert once and reuse const num = Number(timestamp); + if (!isNaN(num)) { + switch (len) { + case 16: // microseconds + return Math.floor(num / 1000); + case 13: // milliseconds + return num; + case 10: // seconds + return num * 1000; + default: + return num; + } + } - // Handle other precisions - switch (len) { - case 16: // microseconds - return Math.floor(num / 1000); - case 13: // milliseconds - return num; - case 10: // seconds - return num * 1000; - default: - return num; + // Try parsing as date string + const date = new Date(timestamp); + if (!isNaN(date.getTime())) { + return date.getTime(); } + + // Default to current time + return Date.now(); } function parse(point, config) { @@ -111,16 +119,8 @@ function formatValue(v, numericType) { return out; }, {}); - // Fast path: timestamp handling - if (timestamp) { - result.timestamp = parseTimestamp(timestamp); - // Store original precision for parquet - result.timestampNano = timestamp; - } else if (config.addTimestamp) { - const now = Date.now(); - result.timestamp = now; - result.timestampNano = (BigInt(now) * 1000000n).toString(); - } + // Handle timestamp with fallback + result.timestamp = parseTimestamp(timestamp); return result; } From 32d9631ad26d6ee02cc88fe17b6b8324ee249388 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 15:28:02 +0100 Subject: [PATCH 53/60] fix timestamping --- hepop.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hepop.js b/hepop.js index d472c65..fa53472 100644 --- a/hepop.js +++ b/hepop.js @@ -1137,7 +1137,6 @@ class HEPServer { } else if (url.pathname === '/write' && req.method === 'POST') { try { const body = await req.text(); - // Split on newlines and filter empty lines const lines = body.split('\n').filter(line => line.trim()); const config = { @@ -1158,8 +1157,8 @@ class HEPServer { bulkData.set(measurement, []); } - // Ensure timestamp is a Date object - const timestamp = new Date(parsed.timestamp); + // Use let for timestamp since we might need to reassign + let timestamp = new Date(parsed.timestamp); if (isNaN(timestamp.getTime())) { console.warn(`Invalid timestamp in line: ${line}, using current time`); timestamp = new Date(); From 9faf5cdd3a7475af6c6fba60ffe6837dac5a66b3 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Tue, 11 Feb 2025 15:37:19 +0100 Subject: [PATCH 54/60] fix timestamping --- hepop.js | 84 +++++++++++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/hepop.js b/hepop.js index fa53472..f26fcff 100644 --- a/hepop.js +++ b/hepop.js @@ -412,66 +412,58 @@ class ParquetBufferManager { const type = measurement; if (!this.buffers.has(type)) { - // Get existing schema if any - let existingSchema = null; - try { - const typeMetadata = await this.getTypeMetadata(type); - if (typeMetadata.files.length > 0) { - const reader = await parquet.ParquetReader.openFile(typeMetadata.files[0].path); - existingSchema = reader.schema; - await reader.close(); - } - } catch (error) { - console.log(`No existing schema found for ${type}, creating new one`); - } + // Create schema from first row to ensure correct types + const firstRow = rows[0]; + const schemaFields = { + timestamp: { type: 'TIMESTAMP_MILLIS' }, + tags: { type: 'UTF8' } + }; - // Merge schemas - const newFields = {}; - rows.forEach(row => { - Object.entries(row).forEach(([key, value]) => { - if (key !== 'timestamp' && key !== 'tags') { - newFields[key] = { - type: typeof value === 'number' ? 'DOUBLE' : - typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', - optional: true // Make all fields optional - }; - } + // Add fields with proper types + Object.entries(firstRow) + .filter(([key]) => !['timestamp', 'tags'].includes(key)) + .forEach(([key, value]) => { + schemaFields[key] = { + type: typeof value === 'number' ? 'DOUBLE' : + typeof value === 'boolean' ? 'BOOLEAN' : 'UTF8', + optional: true + }; }); - }); - - const schema = new parquet.ParquetSchema({ - timestamp: { type: 'TIMESTAMP_MILLIS' }, - tags: { type: 'UTF8' }, - ...newFields - }); + // Create new buffer with schema this.buffers.set(type, { rows: [], - schema, + schema: new parquet.ParquetSchema(schemaFields), isLineProtocol: true }); } const buffer = this.buffers.get(type); - - // Ensure all rows have all fields + + // Ensure all rows have all fields with proper types const allFields = new Set(); - buffer.schema.fieldList.forEach(f => allFields.add(f.path[0])); - - const normalizedRows = rows.map(row => { + rows.forEach(row => { + Object.keys(row).forEach(key => { + if (!['timestamp', 'tags'].includes(key)) { + allFields.add(key); + } + }); + }); + + // Add rows with normalized fields + buffer.rows.push(...rows.map(row => { const normalized = { timestamp: row.timestamp, tags: row.tags }; + + // Add all fields, using null for missing ones allFields.forEach(field => { - if (field !== 'timestamp' && field !== 'tags') { - normalized[field] = row[field] ?? null; - } + normalized[field] = row[field] ?? null; }); - return normalized; - }); - buffer.rows.push(...normalizedRows); + return normalized; + })); if (buffer.rows.length >= this.bufferSize) { await this.flush(type); @@ -1166,8 +1158,12 @@ class HEPServer { bulkData.get(measurement).push({ timestamp, - tags: JSON.stringify(parsed.tags), - ...parsed.fields + tags: JSON.stringify(parsed.tags || {}), + // Convert undefined values to null + ...Object.fromEntries( + Object.entries(parsed.fields || {}) + .map(([k, v]) => [k, v ?? null]) + ) }); } catch (error) { console.warn(`Error parsing line: ${line}`, error); From d579bb5a47a5fcb476658c4f0ac04d646a3ae9c0 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Wed, 12 Feb 2025 23:08:05 +0100 Subject: [PATCH 55/60] fix WHERE handler --- query.js | 183 ++++++++++++++++++++----------------------------------- 1 file changed, 66 insertions(+), 117 deletions(-) diff --git a/query.js b/query.js index 8cf96d2..fd3de6a 100644 --- a/query.js +++ b/query.js @@ -142,140 +142,89 @@ class QueryClient { }; } - async query(sql) { - if (!this.db) { - throw new Error('QueryClient not initialized'); + async query(sql, options = {}) { + const parsed = this.parseQuery(sql); + + if (!this.buffer) { + throw new Error('No buffer manager available'); } try { - const parsed = this.parseQuery(sql); - if (!parsed.type) { - throw new Error('Could not determine type from query'); - } - + const dbName = options.db || 'hep'; const files = await this.findRelevantFiles(parsed.type, parsed.timeRange); - const connection = await this.db.connect(); - try { - // Get buffered data for this type - const buffer = this.buffer?.buffers.get(parsed.type); - let query; - - if (buffer?.rows?.length) { - // Create temp table from buffer using VALUES - const valuesQuery = ` - CREATE TEMP TABLE IF NOT EXISTS buffer_data AS - SELECT * FROM (VALUES ${buffer.rows.map(row => { - if (buffer.isLineProtocol) { - return `( - TIMESTAMP '${row.timestamp.toISOString()}', - '${row.tags}', - ${Object.entries(row) - .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) - .join(', ')} - )`; - } else { - return `( - TIMESTAMP '${new Date(row.create_date).toISOString()}', - '${JSON.stringify(row.protocol_header)}', - '${row.raw || ''}' - )`; - } - }).join(', ')}) - AS t(${buffer.isLineProtocol ? - `timestamp, tags, ${Object.keys(buffer.rows[0]) - .filter(k => !['timestamp', 'tags'].includes(k)) - .join(', ')}` : - 'timestamp, rcinfo, payload'}) - `; - - await connection.runAndReadAll(valuesQuery); - - if (files.length > 0) { - // For aggregate queries, combine data before aggregating - const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || - parsed.columns.toLowerCase().includes('avg('); - - if (isAggregateQuery) { - query = ` - WITH all_data AS ( - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - UNION ALL - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - ) - SELECT ${parsed.columns} - FROM all_data - ${parsed.orderBy} - ${parsed.limit} - `; - } else { - query = ` - SELECT ${parsed.columns} - FROM ( - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - UNION ALL - SELECT ${buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} - FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} - ) combined_data - ${parsed.orderBy} - ${parsed.limit} - `; - } - } else { - // Only query buffer data - query = ` - SELECT ${parsed.columns} + // Build query with union_by_name=true + let query; + if (files.length > 0) { + const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || + parsed.columns.toLowerCase().includes('avg('); + + if (isAggregateQuery) { + // For aggregate queries, apply WHERE conditions before aggregating + query = ` + WITH filtered_data AS ( + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - ${parsed.orderBy} - ${parsed.limit} - `; - } - } else if (files.length > 0) { - // Only query parquet files - query = ` + ) SELECT ${parsed.columns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}]) - ${parsed.timeRange ? `WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}'` : ''} - ${parsed.conditions} + FROM filtered_data ${parsed.orderBy} ${parsed.limit} `; } else { - // No data available - return []; + // Non-aggregate queries remain the same + query = ` + SELECT ${parsed.columns} + FROM ( + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + UNION ALL + SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ) combined_data + ${parsed.orderBy} + ${parsed.limit} + `; } + } else { + // Only query buffer data + query = ` + SELECT ${parsed.columns} + FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ${parsed.orderBy} + ${parsed.limit} + `; + } - const reader = await connection.runAndReadAll(query); - return reader.getRows().map(row => { - const obj = {}; - reader.columnNames().forEach((col, i) => { - obj[col] = row[i]; - }); - return obj; + // Execute query + const connection = await this.db.connect(); + const result = await connection.runAndReadAll(query); + return result.getRows().map(row => { + const obj = {}; + result.columnNames().forEach((col, i) => { + obj[col] = row[i]; }); - } finally { - await connection.close(); - } + return obj; + }); + } catch (error) { console.error('Query error:', error); throw error; From 35c6d26cee76782a9f1e3b5a9529db8df9288d40 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 14 Feb 2025 01:13:00 +0100 Subject: [PATCH 56/60] fix WHERE handler --- query.js | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/query.js b/query.js index fd3de6a..f1db83d 100644 --- a/query.js +++ b/query.js @@ -152,6 +152,17 @@ class QueryClient { try { const dbName = options.db || 'hep'; const files = await this.findRelevantFiles(parsed.type, parsed.timeRange); + const buffer = this.buffer.buffers.get(parsed.type); + + // Determine if this is HEP or line protocol data + const isHepData = typeof parsed.type === 'number'; + + // Select appropriate columns based on data type + const selectColumns = isHepData ? + 'timestamp, rcinfo, payload' : + (buffer?.rows?.[0] ? + Object.keys(buffer.rows[0]).join(', ') : + '*'); // Build query with union_by_name=true let query; @@ -160,16 +171,15 @@ class QueryClient { parsed.columns.toLowerCase().includes('avg('); if (isAggregateQuery) { - // For aggregate queries, apply WHERE conditions before aggregating query = ` WITH filtered_data AS ( - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${selectColumns} FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${selectColumns} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' @@ -181,17 +191,16 @@ class QueryClient { ${parsed.limit} `; } else { - // Non-aggregate queries remain the same query = ` SELECT ${parsed.columns} FROM ( - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${selectColumns} FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT ${this.buffer.isLineProtocol ? '*' : 'timestamp, rcinfo, payload'} + SELECT ${selectColumns} FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' From fda4b2737b2b056107be72b24c76a2635d363f38 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 14 Feb 2025 01:14:08 +0100 Subject: [PATCH 57/60] fix WHERE handler --- query.js | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/query.js b/query.js index f1db83d..21d8a26 100644 --- a/query.js +++ b/query.js @@ -153,7 +153,8 @@ class QueryClient { const dbName = options.db || 'hep'; const files = await this.findRelevantFiles(parsed.type, parsed.timeRange); const buffer = this.buffer.buffers.get(parsed.type); - + const connection = await this.db.connect(); + // Determine if this is HEP or line protocol data const isHepData = typeof parsed.type === 'number'; @@ -164,6 +165,50 @@ class QueryClient { Object.keys(buffer.rows[0]).join(', ') : '*'); + // Create temp table from buffer if there's data + if (buffer?.rows?.length) { + const createTableQuery = ` + CREATE TEMP TABLE buffer_data AS + SELECT * FROM (VALUES ${buffer.rows.map(row => { + if (!isHepData) { + return `( + TIMESTAMP '${row.timestamp.toISOString()}', + '${row.tags}', + ${Object.entries(row) + .filter(([k]) => !['timestamp', 'tags'].includes(k)) + .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) + .join(', ')} + )`; + } else { + return `( + TIMESTAMP '${new Date(row.create_date).toISOString()}', + '${JSON.stringify(row.protocol_header)}', + '${row.raw || ''}' + )`; + } + }).join(', ')}) + AS t(${selectColumns}) + `; + + await connection.runAndReadAll(createTableQuery); + } else { + // Create empty temp table with correct schema + await connection.runAndReadAll(` + CREATE TEMP TABLE buffer_data ( + timestamp TIMESTAMP, + ${isHepData ? + `rcinfo VARCHAR, + payload VARCHAR` : + `tags VARCHAR, + ${Object.entries(buffer?.schema?.fields || {}) + .filter(([k]) => !['timestamp', 'tags'].includes(k)) + .map(([k, f]) => `${k} ${f.type === 'DOUBLE' ? 'DOUBLE' : 'VARCHAR'}`) + .join(',\n ')}` + } + ) + `); + } + // Build query with union_by_name=true let query; if (files.length > 0) { @@ -223,10 +268,9 @@ class QueryClient { `; } - // Execute query - const connection = await this.db.connect(); + // Execute query and return results const result = await connection.runAndReadAll(query); - return result.getRows().map(row => { + const rows = result.getRows().map(row => { const obj = {}; result.columnNames().forEach((col, i) => { obj[col] = row[i]; @@ -234,6 +278,9 @@ class QueryClient { return obj; }); + await connection.close(); + return rows; + } catch (error) { console.error('Query error:', error); throw error; From 07896b05a0cebb610137ee7ab4a3a5e82ac80b5c Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 14 Feb 2025 01:15:04 +0100 Subject: [PATCH 58/60] fix WHERE handler --- query.js | 88 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/query.js b/query.js index 21d8a26..e353d67 100644 --- a/query.js +++ b/query.js @@ -157,42 +157,49 @@ class QueryClient { // Determine if this is HEP or line protocol data const isHepData = typeof parsed.type === 'number'; - - // Select appropriate columns based on data type - const selectColumns = isHepData ? - 'timestamp, rcinfo, payload' : - (buffer?.rows?.[0] ? - Object.keys(buffer.rows[0]).join(', ') : - '*'); + // Drop existing temp table if exists + await connection.runAndReadAll('DROP TABLE IF EXISTS buffer_data'); + // Create temp table from buffer if there's data if (buffer?.rows?.length) { - const createTableQuery = ` - CREATE TEMP TABLE buffer_data AS - SELECT * FROM (VALUES ${buffer.rows.map(row => { - if (!isHepData) { - return `( - TIMESTAMP '${row.timestamp.toISOString()}', - '${row.tags}', - ${Object.entries(row) - .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([,v]) => typeof v === 'string' ? `'${v}'` : v) - .join(', ')} - )`; - } else { - return `( - TIMESTAMP '${new Date(row.create_date).toISOString()}', - '${JSON.stringify(row.protocol_header)}', - '${row.raw || ''}' - )`; - } - }).join(', ')}) - AS t(${selectColumns}) - `; + const firstRow = buffer.rows[0]; + const columns = Object.keys(firstRow); + + // Create schema string + const schemaStr = columns.map(col => { + const value = firstRow[col]; + const type = typeof value === 'number' ? 'DOUBLE' : + value instanceof Date ? 'TIMESTAMP' : + 'VARCHAR'; + return `${col} ${type}`; + }).join(',\n'); + + // Create empty table first + await connection.runAndReadAll(` + CREATE TEMP TABLE buffer_data ( + ${schemaStr} + ) + `); - await connection.runAndReadAll(createTableQuery); + // Insert data in batches + const batchSize = 1000; + for (let i = 0; i < buffer.rows.length; i += batchSize) { + const batch = buffer.rows.slice(i, i + batchSize); + await connection.runAndReadAll(` + INSERT INTO buffer_data + SELECT * FROM (VALUES ${batch.map(row => `( + ${columns.map(col => { + const value = row[col]; + if (value instanceof Date) return `TIMESTAMP '${value.toISOString()}'`; + if (typeof value === 'string') return `'${value.replace(/'/g, "''")}'`; + return value === null ? 'NULL' : value; + }).join(', ')} + )`).join(', ')}) + `); + } } else { - // Create empty temp table with correct schema + // Create empty table with basic schema await connection.runAndReadAll(` CREATE TEMP TABLE buffer_data ( timestamp TIMESTAMP, @@ -200,16 +207,13 @@ class QueryClient { `rcinfo VARCHAR, payload VARCHAR` : `tags VARCHAR, - ${Object.entries(buffer?.schema?.fields || {}) - .filter(([k]) => !['timestamp', 'tags'].includes(k)) - .map(([k, f]) => `${k} ${f.type === 'DOUBLE' ? 'DOUBLE' : 'VARCHAR'}`) - .join(',\n ')}` + value DOUBLE` } ) `); } - // Build query with union_by_name=true + // Build query let query; if (files.length > 0) { const isAggregateQuery = parsed.columns.toLowerCase().includes('count(') || @@ -218,14 +222,12 @@ class QueryClient { if (isAggregateQuery) { query = ` WITH filtered_data AS ( - SELECT ${selectColumns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + SELECT * FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT ${selectColumns} - FROM buffer_data + SELECT * FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} @@ -239,14 +241,12 @@ class QueryClient { query = ` SELECT ${parsed.columns} FROM ( - SELECT ${selectColumns} - FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) + SELECT * FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} UNION ALL - SELECT ${selectColumns} - FROM buffer_data + SELECT * FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} From 2f8dd0e8e137b31b232e1595d0094ea7af85c160 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 14 Feb 2025 01:16:34 +0100 Subject: [PATCH 59/60] fix WHERE handler --- query.js | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/query.js b/query.js index e353d67..4a4fc1c 100644 --- a/query.js +++ b/query.js @@ -121,8 +121,15 @@ class QueryClient { const whereClause = sql.match(/WHERE\s+(.*?)(?:\s+(?:ORDER|GROUP|LIMIT|$))/i); let conditions = ''; if (whereClause) { - conditions = whereClause[1].replace(/time\s*(>=|>|<=|<|=)\s*'[^']+'\s*(AND|OR)?/i, '').trim(); - if (conditions) conditions = `AND ${conditions}`; + // Keep all conditions except the time condition + conditions = whereClause[1] + .split(/\s+AND\s+/i) + .filter(cond => !cond.toLowerCase().includes('time')) + .join(' AND '); + + if (conditions) { + conditions = `AND ${conditions}`; + } } // Extract ORDER BY, LIMIT, etc. @@ -224,16 +231,17 @@ class QueryClient { WITH filtered_data AS ( SELECT * FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} UNION ALL SELECT * FROM buffer_data WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} ) SELECT ${parsed.columns} FROM filtered_data + WHERE 1=1 ${parsed.conditions} ${parsed.orderBy} ${parsed.limit} `; From 02291d9296630ffd167c82ab5657c11e4bd7c721 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 14 Feb 2025 01:18:46 +0100 Subject: [PATCH 60/60] fix WHERE handler --- query.js | 96 +++++++++++++++++++------------------------------------- 1 file changed, 32 insertions(+), 64 deletions(-) diff --git a/query.js b/query.js index 4a4fc1c..b0dbee0 100644 --- a/query.js +++ b/query.js @@ -162,61 +162,26 @@ class QueryClient { const buffer = this.buffer.buffers.get(parsed.type); const connection = await this.db.connect(); - // Determine if this is HEP or line protocol data - const isHepData = typeof parsed.type === 'number'; - - // Drop existing temp table if exists - await connection.runAndReadAll('DROP TABLE IF EXISTS buffer_data'); - // Create temp table from buffer if there's data if (buffer?.rows?.length) { + // Drop existing temp table if exists + await connection.runAndReadAll('DROP TABLE IF EXISTS buffer_data'); + + // Create temp table with schema from first row const firstRow = buffer.rows[0]; const columns = Object.keys(firstRow); - // Create schema string - const schemaStr = columns.map(col => { - const value = firstRow[col]; - const type = typeof value === 'number' ? 'DOUBLE' : - value instanceof Date ? 'TIMESTAMP' : - 'VARCHAR'; - return `${col} ${type}`; - }).join(',\n'); - - // Create empty table first await connection.runAndReadAll(` - CREATE TEMP TABLE buffer_data ( - ${schemaStr} - ) - `); - - // Insert data in batches - const batchSize = 1000; - for (let i = 0; i < buffer.rows.length; i += batchSize) { - const batch = buffer.rows.slice(i, i + batchSize); - await connection.runAndReadAll(` - INSERT INTO buffer_data - SELECT * FROM (VALUES ${batch.map(row => `( - ${columns.map(col => { - const value = row[col]; - if (value instanceof Date) return `TIMESTAMP '${value.toISOString()}'`; - if (typeof value === 'string') return `'${value.replace(/'/g, "''")}'`; - return value === null ? 'NULL' : value; - }).join(', ')} - )`).join(', ')}) - `); - } - } else { - // Create empty table with basic schema - await connection.runAndReadAll(` - CREATE TEMP TABLE buffer_data ( - timestamp TIMESTAMP, - ${isHepData ? - `rcinfo VARCHAR, - payload VARCHAR` : - `tags VARCHAR, - value DOUBLE` - } - ) + CREATE TEMP TABLE buffer_data AS + SELECT * FROM (VALUES ${buffer.rows.map(row => `( + ${columns.map(col => { + const value = row[col]; + if (value instanceof Date) return `TIMESTAMP '${value.toISOString()}'`; + if (typeof value === 'string') return `'${value.replace(/'/g, "''")}'`; + return value === null ? 'NULL' : value; + }).join(', ')} + )`).join(', ')}) + AS t(${columns.join(', ')}) `); } @@ -231,19 +196,18 @@ class QueryClient { WITH filtered_data AS ( SELECT * FROM read_parquet([${files.map(f => `'${f.path}'`).join(', ')}], union_by_name=true) WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ${buffer?.rows?.length ? ` + UNION ALL + SELECT * FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - UNION ALL - SELECT * FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} + ` : ''} ) SELECT ${parsed.columns} FROM filtered_data - WHERE 1=1 ${parsed.conditions} - ${parsed.orderBy} - ${parsed.limit} `; } else { query = ` @@ -253,17 +217,19 @@ class QueryClient { WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' ${parsed.conditions} - UNION ALL - SELECT * FROM buffer_data - WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' - AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' - ${parsed.conditions} + ${buffer?.rows?.length ? ` + UNION ALL + SELECT * FROM buffer_data + WHERE timestamp >= TIMESTAMP '${new Date(parsed.timeRange.start / 1000000).toISOString()}' + AND timestamp <= TIMESTAMP '${new Date(parsed.timeRange.end / 1000000).toISOString()}' + ${parsed.conditions} + ` : ''} ) combined_data ${parsed.orderBy} ${parsed.limit} `; } - } else { + } else if (buffer?.rows?.length) { // Only query buffer data query = ` SELECT ${parsed.columns} @@ -274,9 +240,11 @@ class QueryClient { ${parsed.orderBy} ${parsed.limit} `; + } else { + // No data available + return []; } - // Execute query and return results const result = await connection.runAndReadAll(query); const rows = result.getRows().map(row => { const obj = {};