diff --git a/pg_duckdb-motherduck/benchmark.sh b/pg_duckdb-motherduck/benchmark.sh index 04705c04f..27aab8832 100755 --- a/pg_duckdb-motherduck/benchmark.sh +++ b/pg_duckdb-motherduck/benchmark.sh @@ -1,6 +1,6 @@ #!/bin/bash -set -ex +set -e #sudo apt-get update #sudo apt-get install -y docker.io @@ -22,21 +22,22 @@ set -ex # export motherduck_token=... # create a database called pgclick in the motherduck UI or duckdb cli # `CREATE DATABASE pgclick` -# You will also need to create dummy table in that database. For example, run -# `create table pgclick.foo as SELECT 1 as a;` -# (https://github.com/duckdb/pg_duckdb/issues/450) +if [ -z "${MOTHERDUCK_TOKEN}" ]; then + echo "Error: MOTHERDUCK_TOKEN is not set." + exit 1 +fi -sudo docker run -d --name pgduck -e POSTGRES_PASSWORD=duckdb -e MOTHERDUCK_TOKEN=$MOTHERDUCK_TOKEN pgduckdb/pgduckdb:16-main -c duckdb.motherduck_enabled=true +sudo docker run -d --name pgduck --network=host -e POSTGRES_PASSWORD=duckdb -e MOTHERDUCK_TOKEN=${MOTHERDUCK_TOKEN} pgduckdb/pgduckdb:16-main -c duckdb.motherduck_enabled=true # Give postgres time to start running -sleep 5 +sleep 10 ./load.sh 2>&1 | tee load_log.txt ./run.sh 2>&1 | tee log.txt -# Go to motherduck UI and execute: +# Go to https://app.motherduck.com and execute: # `SELECT database_size FROM pragma_database_size() WHERE database_name = 'pgclick'` # 25 GB diff --git a/pg_duckdb-motherduck/create.sql b/pg_duckdb-motherduck/create.sql index 5ae9eef4c..51336b6b3 100644 --- a/pg_duckdb-motherduck/create.sql +++ b/pg_duckdb-motherduck/create.sql @@ -1,4 +1,4 @@ create or replace view hits_view as select WatchID, JavaEnable, Title, GoodEvent, 'epoch'::timestamp + (EventTime || 'second')::interval EventTime, 'epoch'::timestamp + (EventDate || 'day')::interval EventDate, CounterID, ClientIP, RegionID, UserID, CounterClass, OS, UserAgent, URL, Referer, IsRefresh, RefererCategoryID, RefererRegionID, URLCategoryID, URLRegionID, ResolutionWidth, ResolutionHeight, ResolutionDepth, FlashMajor, FlashMinor, FlashMinor2, NetMajor, NetMinor, UserAgentMajor, UserAgentMinor, CookieEnable, JavascriptEnable, IsMobile, MobilePhone, MobilePhoneModel, Params, IPNetworkID, TraficSourceID, SearchEngineID, SearchPhrase, AdvEngineID, IsArtifical, WindowClientWidth, WindowClientHeight, ClientTimeZone, 'epoch'::timestamp + (ClientEventTime || 'second')::interval ClientEventTime, SilverlightVersion1, SilverlightVersion2, SilverlightVersion3, SilverlightVersion4, PageCharset, CodeVersion, IsLink, IsDownload, IsNotBounce, FUniqID, OriginalURL, HID, IsOldCounter, IsEvent, IsParameter, DontCountHits, WithHash, HitColor, 'epoch'::timestamp + (LocalEventTime || 'second')::interval LocalEventTime, Age, Sex, Income, Interests, Robotness, RemoteIP, WindowName, OpenerName, HistoryLength, BrowserLanguage, BrowserCountry, SocialNetwork, SocialAction, HTTPError, SendTiming, DNSTiming, ConnectTiming, ResponseStartTiming, ResponseEndTiming, FetchTiming, SocialSourceNetworkID, SocialSourcePage, ParamPrice, ParamOrderID, ParamCurrency, ParamCurrencyID, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID, UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash, URLHash, CLID from read_parquet('REPLACE_PARQUET_FILE') as (WatchID BIGINT, JavaEnable SMALLINT, Title VARCHAR, GoodEvent SMALLINT, EventTime BIGINT, EventDate int, CounterID INTEGER, ClientIP INTEGER, RegionID INTEGER, UserID BIGINT, CounterClass SMALLINT, OS SMALLINT, UserAgent SMALLINT, URL VARCHAR, Referer VARCHAR, IsRefresh SMALLINT, RefererCategoryID SMALLINT, RefererRegionID INTEGER, URLCategoryID SMALLINT, URLRegionID INTEGER, ResolutionWidth SMALLINT, ResolutionHeight SMALLINT, ResolutionDepth SMALLINT, FlashMajor SMALLINT, FlashMinor SMALLINT, FlashMinor2 VARCHAR, NetMajor SMALLINT, NetMinor SMALLINT, UserAgentMajor SMALLINT, UserAgentMinor VARCHAR, CookieEnable SMALLINT, JavascriptEnable SMALLINT, IsMobile SMALLINT, MobilePhone SMALLINT, MobilePhoneModel VARCHAR, Params VARCHAR, IPNetworkID INTEGER, TraficSourceID SMALLINT, SearchEngineID SMALLINT, SearchPhrase VARCHAR, AdvEngineID SMALLINT, IsArtifical SMALLINT, WindowClientWidth SMALLINT, WindowClientHeight SMALLINT, ClientTimeZone SMALLINT, ClientEventTime BIGINT, SilverlightVersion1 SMALLINT, SilverlightVersion2 SMALLINT, SilverlightVersion3 INTEGER, SilverlightVersion4 SMALLINT, PageCharset VARCHAR, CodeVersion INTEGER, IsLink SMALLINT, IsDownload SMALLINT, IsNotBounce SMALLINT, FUniqID BIGINT, OriginalURL VARCHAR, HID INTEGER, IsOldCounter SMALLINT, IsEvent SMALLINT, IsParameter SMALLINT, DontCountHits SMALLINT, WithHash SMALLINT, HitColor VARCHAR, LocalEventTime BIGINT, Age SMALLINT, Sex SMALLINT, Income SMALLINT, Interests SMALLINT, Robotness SMALLINT, RemoteIP INTEGER, WindowName INTEGER, OpenerName INTEGER, HistoryLength SMALLINT, BrowserLanguage VARCHAR, BrowserCountry VARCHAR, SocialNetwork VARCHAR, SocialAction VARCHAR, HTTPError SMALLINT, SendTiming INTEGER, DNSTiming INTEGER, ConnectTiming INTEGER, ResponseStartTiming INTEGER, ResponseEndTiming INTEGER, FetchTiming INTEGER, SocialSourceNetworkID SMALLINT, SocialSourcePage VARCHAR, ParamPrice BIGINT, ParamOrderID VARCHAR, ParamCurrency VARCHAR, ParamCurrencyID SMALLINT, OpenstatServiceName VARCHAR, OpenstatCampaignID VARCHAR, OpenstatAdID VARCHAR, OpenstatSourceID VARCHAR, UTMSource VARCHAR, UTMMedium VARCHAR, UTMCampaign VARCHAR, UTMContent VARCHAR, UTMTerm VARCHAR, FromTag VARCHAR, HasGCLID SMALLINT, RefererHash BIGINT, URLHash BIGINT, CLID INTEGER); -CREATE table REPLACE_DATABASE.hits USING DUCKDB AS SELECT * FROM hits_view; +CREATE table REPLACE_SCHEMA.hits USING DUCKDB AS SELECT * FROM hits_view; diff --git a/pg_duckdb-motherduck/load.sh b/pg_duckdb-motherduck/load.sh index a583ab326..d0912eff2 100755 --- a/pg_duckdb-motherduck/load.sh +++ b/pg_duckdb-motherduck/load.sh @@ -1,8 +1,6 @@ #!/bin/bash -# Your docker may be set up to use localhost instead, if so -# edit the ip address below. -CONNECTION=postgres://postgres:duckdb@172.17.0.2:5432/postgres +CONNECTION=postgres://postgres:duckdb@localhost:5432/postgres PSQL=psql DATABASE='ddb$pgclick' @@ -12,6 +10,6 @@ echo "Loading data" ( echo "\timing" cat create.sql | - sed -e "s/REPLACE_DATABASE/$DATABASE/g" -e "s/REPLACE_PARQUET_FILE/$PARQUET_FILE/g" + sed -e "s/REPLACE_SCHEMA/$DATABASE/g" -e "s/REPLACE_PARQUET_FILE/$PARQUET_FILE/g" ) | $PSQL $CONNECTION | grep 'Time' diff --git a/pg_duckdb-motherduck/run.sh b/pg_duckdb-motherduck/run.sh index e5992789c..af6d05fcc 100755 --- a/pg_duckdb-motherduck/run.sh +++ b/pg_duckdb-motherduck/run.sh @@ -1,9 +1,7 @@ #!/bin/bash TRIES=3 -# Your docker may be set up to use localhost, if so edit ip address -# below. -CONNECTION=postgres://postgres:duckdb@172.17.0.2:5432/postgres +CONNECTION=postgres://postgres:duckdb@localhost:5432/postgres DATABASE='ddb$pgclick'