From 5564903a1a1a793dd134b4026f27a9e633ef8213 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 12 Nov 2024 00:41:31 +0000 Subject: [PATCH 1/2] Added Tinybird --- tinybird/README.md | 32 +++++++++++++++++++ tinybird/results/tinybird.json | 58 ++++++++++++++++++++++++++++++++++ tinybird/run.sh | 24 ++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 tinybird/README.md create mode 100644 tinybird/results/tinybird.json create mode 100755 tinybird/run.sh diff --git a/tinybird/README.md b/tinybird/README.md new file mode 100644 index 000000000..eabad249f --- /dev/null +++ b/tinybird/README.md @@ -0,0 +1,32 @@ +# Disclaimer + +Benchmarking a database often requires deep expertise and fine-tuning. Here, our goal is merely to test the default experience of a new +user, i.e. someone who does not invest the time to optimize performance. + +Testing is semi-automatized. + +The system as a timeout of 10s, after that it recommends to optimize (rewrite) the query. + +Load time and data size in the results are set to 0 as Tinybird did not indicate these resources. + +# Creating an account + +Head to https://www.tinybird.co and create an account. + +# Inserting data + +Tinybird supports data inserts from various sources. We are going to use S3 to load a Parquet file into Tinybird. Since Tinybird limits the +file size to 1 GB and the test data set is larger than that, we split it into smaller chunks using ClickHouse: + +```sql +INSERT INTO FUNCTION s3('https://hitsparquet.s3.eu-west-3.amazonaws.com/data/hits_{_partition_id}.parquet', '', '', 'Parquet') +PARTITION BY rand() % 50 +SELECT * FROM hits +``` + +Import of files with sizes a little bit less than 1 GB did not always work. We instead used 50 files of around 280 MB each. You will need to +use the auto mode to make sure all the files are read. + +# Querying the data + +Once the data is inserted you can create the endpoints needed to run the benchmark using pipes. `run.sh` will iterate through each endpoint. diff --git a/tinybird/results/tinybird.json b/tinybird/results/tinybird.json new file mode 100644 index 000000000..0d10b1009 --- /dev/null +++ b/tinybird/results/tinybird.json @@ -0,0 +1,58 @@ +{ + "system": "Tinybird", + "date": "2024-11-11", + "machine": "serverless", + "cluster_size": 1, + "comment": "", + + "tags": ["C++", "column-oriented", "ClickHouse derivative"], + + "load_time": 0, + "data_size": 0, + + "result": [ + [0.002, 0.002, 0.006], + [0.03, 0.03, 0.034], + [0.08, 0.08, 0.083], + [0.109, 0.104, 0.102], + [1.011, 0.668, 0.637], + [0.664, 0.582, 0.597], + [1.899, 0.055, 0.068], + [0.036, 0.033, 0.032], + [2.55, 2.519, 2.341], + [2.076, 2.042, 2.387], + [0.344, 0.365, 0.322], + [0.599, 0.401, 0.413], + [1.228, 1.265, 1.085], + [1.742, 1.729, 1.941], + [1.477, 1.468, 1.423], + [1.442, 1.317, 1.496], + [4.402, 4.385, 4.476], + [3.236, 3.165, 3.396], + [9.751, 9.707, 8.164], + [0.079, 0.084, 0.074], + [2.924, 1.204, 1.213], + [1.243, 1.122, 1.69], + [3.846, 1.67, 1.727], + [7.491, 8.793, 7.513], + [0.352, 0.338, 0.39], + [0.293, 0.255, 0.262], + [0.384, 0.445, 0.407], + [1.518, 1.366, 1.326], + [null, null, null], + [0.09, 0.092, 0.094], + [0.951, 0.897, 0.946], + [1.152, 1.106, 1.299], + [5.331, 5.509, 5.432], + [8.104, 8.451, 8.519], + [6.037, 6.861, 7.138], + [3.298, 3.412, 3.422], + [1.225, 1.121, 1.167], + [1.53, 1.768, 1.189], + [0.989, 0.891, 0.854], + [2.183, 2.042, 1.919], + [0.166, 0.018, 0.015], + [0.527, 0.284, 0.304], + [0.214, 0.213, 0.205] +] +} diff --git a/tinybird/run.sh b/tinybird/run.sh new file mode 100755 index 000000000..81733a08b --- /dev/null +++ b/tinybird/run.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Define the base URL and Authorization token +BASE_URL="https://api.tinybird.co/v0/pipes/" +AUTH_HEADER= + +results="[" + +for i in {1..43}; do + times=() + for j in {1..3}; do + response=$(curl -s --compressed -H "$AUTH_HEADER" "${BASE_URL}Q${i}.json") + + elapsed=$(echo "$response" | jq '.statistics.elapsed') + echo "$elapsed" + times+=($elapsed) + done + results+=$(printf "[%s,%s,%s]," "${times[0]}" "${times[1]}" "${times[2]}") +done + +results=${results%,} +results+="]" + +echo "$results" From b826a277862d0ddbd8f0d596e67c4e6431e8ef64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Nov 2024 17:49:12 +0100 Subject: [PATCH 2/2] Update README.md --- tinybird/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tinybird/README.md b/tinybird/README.md index eabad249f..c6b8df3be 100644 --- a/tinybird/README.md +++ b/tinybird/README.md @@ -3,11 +3,11 @@ Benchmarking a database often requires deep expertise and fine-tuning. Here, our goal is merely to test the default experience of a new user, i.e. someone who does not invest the time to optimize performance. -Testing is semi-automatized. +Testing is semi-automated. -The system as a timeout of 10s, after that it recommends to optimize (rewrite) the query. +The system has a timeout of 10s; after that, it recommends to optimize (rewrite) the query. -Load time and data size in the results are set to 0 as Tinybird did not indicate these resources. +Load time and data size in the results are set to 0, as Tinybird did not indicate these resources. # Creating an account @@ -16,7 +16,7 @@ Head to https://www.tinybird.co and create an account. # Inserting data Tinybird supports data inserts from various sources. We are going to use S3 to load a Parquet file into Tinybird. Since Tinybird limits the -file size to 1 GB and the test data set is larger than that, we split it into smaller chunks using ClickHouse: +file size to 1 GB, and the test data set is larger than that, we split it into smaller chunks using ClickHouse: ```sql INSERT INTO FUNCTION s3('https://hitsparquet.s3.eu-west-3.amazonaws.com/data/hits_{_partition_id}.parquet', '', '', 'Parquet') @@ -24,7 +24,7 @@ PARTITION BY rand() % 50 SELECT * FROM hits ``` -Import of files with sizes a little bit less than 1 GB did not always work. We instead used 50 files of around 280 MB each. You will need to +Importing files with sizes a little bit less than 1 GB did not always work. We instead used 50 files of around 280 MB each. You will need to use the auto mode to make sure all the files are read. # Querying the data