Skip to content

feat: add openai_embed function #5636

feat: add openai_embed function

feat: add openai_embed function #5636

Workflow file for this run

name: GlareDB CI
on:
push:
branches:
- main
pull_request:
merge_group:
workflow_dispatch:
concurrency:
group: build-ci-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
name: Build
runs-on: ubuntu-latest-8-cores
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache@v4
name: toolchain cache
with:
path: ~/.rustup/toolchains/
key: ${{ runner.os }}-toolchain-build-${{ hashFiles('**/rust-toolchain.toml') }}
- uses: actions/cache@v4
name: cargo cache
with:
path: ~/.cargo/
key: ${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-pkg-build-
- uses: actions/cache@v4
name: build cache
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-glaredb-build-${{ github.ref_name }}
restore-keys: |
${{ runner.os }}-cargo-glaredb-build-${{ hashFiles('**/Cargo.lock') }}
${{ runner.os }}-cargo-glaredb-build-
- run: just build
- run: cargo build --bin pgprototest
- uses: actions/cache/save@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
- uses: actions/cache/save@v4
name: pgprototest cache
with:
path: target/debug/pgprototest
key: ${{ runner.os }}-pgprototest-bin-${{ github.run_id }}
fmt:
name: Format (rustfmt +nightly)
needs: ["build"]
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache@v4
name: nightly toolchain cache
id: lint-toolchain
with:
path: ~/.rustup/toolchains/
key: ${{ runner.os }}-toolchain-lint-${{ hashFiles('**/rustfmt.toml') }}
- run: rustup install nightly
if: ${{ steps.lint-toolchain.outputs.cache-hit != 'true' }}
- run: rustup component add rustfmt --toolchain nightly
if: ${{ steps.lint-toolchain.outputs.cache-hit != 'true' }}
- run: just fmt-check
lint:
name: Lint (clippy)
# emperically runtimes are the same for big/small hosts:
runs-on: ubuntu-latest
needs: ["build"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache@v4
name: toolchain cache
with:
path: ~/.rustup/toolchains/
key: ${{ runner.os }}-toolchain-build-${{ hashFiles('**/rust-toolchain.toml') }}
- uses: actions/cache@v4
name: cargo cache
with:
path: ~/.cargo/
key: ${{ runner.os }}-cargo-pkg-lint-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
${{ runner.os }}-cargo-pkg-lint-
${{ runner.os }}-cargo-pkg-build-
- uses: actions/cache/restore@v4
name: build cache
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-glaredb-build-${{ github.ref_name }}
- run: just clippy
python-binding-tests:
name: Python Binding Tests
runs-on: ubuntu-latest-4-cores
needs: ["build"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: toolchain cache
with:
path: ~/.rustup/toolchains/
key: ${{ runner.os }}-toolchain-build-${{ hashFiles('**/rust-toolchain.toml') }}
- uses: actions/cache@v4
name: cargo cache
with:
path: ~/.cargo/
key: ${{ runner.os }}-cargo-pkg-python-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
${{ runner.os }}-cargo-pkg-python-
${{ runner.os }}-cargo-pkg-build-
- uses: actions/cache@v4
name: build cache
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-glaredb-build-${{ github.ref_name }}
- run: just python build
- run: just python test
nodejs-bindings-tests:
name: Node.js Binding Tests
runs-on: ubuntu-latest-8-cores
needs: ["build"]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: toolchain cache
with:
path: |
~/.rustup/toolchains/
key: ${{ runner.os }}-toolchain-build-${{ hashFiles('**/rust-toolchain.toml') }}
- uses: actions/cache@v4
name: cargo cache
with:
path: ~/.cargo/
key: ${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-pkg-build-
- uses: actions/cache/restore@v4
name: build cache
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-glaredb-build-${{ github.ref_name }}
- run: just js build-debug
- run: just js test
unit-tests:
name: Unit Tests
runs-on: ubuntu-latest-8-cores
needs: ["build"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: toolchain cache
with:
path: ~/.rustup/toolchains/
key: ${{ runner.os }}-toolchain-build-${{ hashFiles('**/rust-toolchain.toml') }}
- uses: actions/cache@v4
name: cargo cache
with:
path: ~/.cargo/
key: ${{ runner.os }}-cargo-pkg-unit-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
${{ runner.os }}-cargo-pkg-unit-
${{ runner.os }}-cargo-pkg-build-
- uses: actions/cache/restore@v4
name: build cache
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-glaredb-build-${{ github.ref_name }}
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- run: just unit-tests
cache:
# when we change the Cargo.lock we should copy the branch cache
# from this build to the expected cache location to better
# populate caches.
name: Cache Maintenance
runs-on: ubuntu-latest
needs: ["build"]
# parent branches can't use caches from child branches, so no need
# to update this cache _except_ on main.
if: github.ref == 'refs/heads/master'
steps:
- uses: actions/cache@v4
name: cache check
id: target-for-cargo-hash
with:
lookup-only: true
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
- uses: actions/cache/restore@v4
name: build cache restore
if: ${{ steps.target-for-cargo-hash.cache-hit != 'true' }}
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-glaredb-build-${{ github.ref_name }}
- uses: actions/cache/save@v4
name: cache update
if: ${{ steps.target-for-cargo-hash.cache-hit != 'true' }}
with:
path: |
target/
!target/**/glaredb
!target/**/pgprototest
key: ${{ runner.os }}-cargo-pkg-build-${{ hashFiles('**/Cargo.lock') }}
pg-protocol:
name: PG Protocol Tests
runs-on: ubuntu-latest
needs: ["build"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- uses: actions/cache/restore@v4
name: pgprototest cache
with:
path: target/debug/pgprototest
key: ${{ runner.os }}-pgprototest-bin-${{ github.run_id }}
fail-on-cache-miss: true
- run: ./scripts/protocol-test.sh
- run: just slt-bin-debug 'pgproto/*'
sql-logic-tests:
name: SQL Logic Tests
runs-on: ubuntu-latest
needs: ["build"]
strategy:
matrix:
protocol: ["postgres", "flightsql", "rpc"]
include:
- protocol: "postgres"
basic: just slt-bin --protocol=postgres 'sqllogictests/*'
debug: just slt-bin-debug --protocol=postgres 'sqllogictests/*'
- protocol: "flightsql"
basic: just slt-bin --protocol=flightsql 'sqllogictests/*'
debug: just slt-bin-debug --protocol=flightsql 'sqllogictests/*'
- protocol: "rpc"
basic: just rpc-tests
debug: just rpc-tests
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- name: public sql logic tests DEBUG
if: ${{ env.ACTIONS_STEP_DEBUG == 'true' }}
run: ${{matrix.protocol.debug}}
- name: public sql logic tests
if: ${{ env.ACTIONS_STEP_DEBUG != 'true' }}
run: ${{matrix.protocol.basic}}
process-integration-tests:
name: Process Integration Tests (pytest)
runs-on: ubuntu-latest
needs: ["build"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- name: install poetry
uses: snok/install-poetry@v1
- name: install python
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: poetry
cache-dependency-path: tests/poetry.lock
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- run: just venv
- run: just pytest
service-integration-tests:
name: Service Integration Tests (SLT)
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.owner.login == 'GlareDB'
runs-on: ubuntu-latest
needs: ["sql-logic-tests"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- name: GCP authenticate
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
- name: setup GCP
uses: google-github-actions/setup-gcloud@v2
- run: ./scripts/prepare-testdata.sh
- name: run tests (slt)
env:
GCP_SERVICE_ACCOUNT_KEY: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
GCP_PROJECT_ID: glaredb-artifacts
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AZURE_ACCESS_KEY: ${{ secrets.AZURE_ACCESS_KEY }}
AZURE_ACCOUNT: ${{ secrets.AZURE_ACCOUNT }}
TEST_BUCKET: glaredb-test-bucket
run: |
# Prepare SLT (BigQuery)
if ./scripts/files-changed-in-branch.sh \
"scripts/prepare-testdata.sh" \
"scripts/create-test-bigquery-db.sh" \
"testdata/sqllogictests_datasources_common/data" \
"testdata/sqllogictests_bigquery/data"
then
export GCP_PROJECT_ID=glaredb-dev-playground
export BIGQUERY_DATASET_ID=$(./scripts/create-test-bigquery-db.sh)
else
export BIGQUERY_DATASET_ID=glaredb_test
fi
# Prepare SLT (Object store)
export GCS_BUCKET_NAME=glaredb-test
export AWS_S3_REGION=us-east-1
export AWS_S3_BUCKET_NAME=glaredb-test
# Unset application default credentials. We don't want to unknowingly
# depend on this.
unset GOOGLE_APPLICATION_CREDENTIALS
# Prepare SLT (Postgres)
POSTGRES_TEST_DB=$(./scripts/create-test-postgres-db.sh)
export POSTGRES_CONN_STRING=$(echo "$POSTGRES_TEST_DB" | sed -n 1p)
export POSTGRES_TUNNEL_SSH_CONN_STRING=$(echo "$POSTGRES_TEST_DB" | sed -n 2p)
# Prepare SLT (fake GCS server)
./scripts/create-test-gcs-store.sh
echo "-------------------------------- WITHOUT TUNNEL TEST --------------------------------"
# Run all data source tests without running tunnel tests or the basic
# SLT tests.
just slt-bin --exclude 'sqllogictests/*' \
--exclude '*/tunnels/ssh' \
--exclude 'sqllogictests_snowflake/*' \
--exclude 'sqllogictests_cassandra/*' \
--exclude 'sqllogictests_clickhouse/*' \
--exclude 'sqllogictests_sqlserver/*' \
--exclude 'sqllogictests_mongodb/*' \
--exclude 'sqllogictests_mysql/*' \
echo "-------------------------------- WITH TUNNEL TEST --------------------------------"
# SSH tests are prone to fail if we make a lot of connections at the
# same time. Hence, it makes sense to run all the SSH tests one-by-one
# in order to test the SSH tunnels (which is our aim).
just slt-bin --jobs=1 '*/tunnels/ssh' --exclude 'sqllogictests_mysql/*'
# TODO: move these into the datasource-integration-tests job
echo "-------------------------------- RPC TESTS --------------------------------"
echo "-------------------------- BigQuery -------------------------- "
just slt-bin --protocol=rpc 'sqllogictests_bigquery/*'
echo "-------------------------- Iceberg -------------------------- "
just slt-bin --protocol=rpc 'sqllogictests_iceberg/*'
echo "-------------------------- Native -------------------------- "
just slt-bin --protocol=rpc 'sqllogictests_native/*'
echo "-------------------------- Object Store -------------------------- "
just slt-bin --protocol=rpc 'sqllogictests_object_store/*'
echo "-------------------------- Postgres -------------------------- "
just slt-bin --protocol=rpc --exclude '*/tunnels/ssh' 'sqllogictests_postgres/*'
echo "-------------------------- Fake GCS server with a subdirectory -------------------------- "
just slt-bin -l gs://$TEST_BUCKET/path/to/folder/1 -o service_account_path=/tmp/fake-gcs-creds.json 'sqllogictests_native/*'
just slt-bin -l gs://$TEST_BUCKET/path/to/folder/2 -o service_account_path=/tmp/fake-gcs-creds.json 'sqllogictests_native/*'
minio-integration-tests:
name: MinIO Integration Tests (SLT::MinIO)
runs-on: ubuntu-latest
needs: ["sql-logic-tests"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- name: run tests (slt)
env:
MINIO_ACCESS_KEY: glaredb
MINIO_SECRET_KEY: glaredb_test
TEST_BUCKET: glaredb-test-bucket
run: |
# prepare testdata
./scripts/prepare-testdata.sh
# Prepare SLT (MinIO)
./scripts/create-test-minio-store.sh
echo "-------------------------- MinIO (S3) -------------------------- "
# PG protocol
just slt-bin --location http://localhost:9100 \
--option access_key_id=$MINIO_ACCESS_KEY \
--option secret_access_key=$MINIO_SECRET_KEY \
--option bucket=$TEST_BUCKET \
'sqllogictests/*' \
'sqllogictests_native/*'
# Flight protocol
just slt-bin --location http://localhost:9100 \
--option access_key_id=$MINIO_ACCESS_KEY \
--option secret_access_key=$MINIO_SECRET_KEY \
--option bucket=$TEST_BUCKET \
'sqllogictests/*' \
'sqllogictests_native/*' \
--protocol=flightsql
echo "-------------------------- MinIO (S3) With a sub-directory -------------------------- "
# PG protocol
just slt-bin -l http://localhost:9100/$TEST_BUCKET/path/to/folder \
-o access_key_id=$MINIO_ACCESS_KEY \
-o secret_access_key=$MINIO_SECRET_KEY \
'sqllogictests/*'
# Flight protocol
just slt-bin -l http://localhost:9100/$TEST_BUCKET/path/to/folder \
-o access_key_id=$MINIO_ACCESS_KEY \
-o secret_access_key=$MINIO_SECRET_KEY \
'sqllogictests/*' \
--protocol=flightsql
service-integration-tests-snowflake:
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.owner.login == 'GlareDB'
name: Snowflake Integration Tests (SLT::Snowflake)
runs-on: ubuntu-latest
needs: ["sql-logic-tests"]
concurrency:
group: snowflake-integration-tests
cancel-in-progress: false
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- name: snowflake setup (SnowSQL)
run: |
curl -o snowsql.bash \
https://sfc-repo.snowflakecomputing.com/snowsql/bootstrap/1.2/linux_x86_64/snowsql-1.2.24-linux_x86_64.bash
mkdir -p ~/bin
SNOWSQL_DEST=~/bin SNOWSQL_LOGIN_SHELL=~/.profile bash snowsql.bash
- run: ./scripts/prepare-testdata.sh
- name: run tests (slt)
env:
SNOWFLAKE_USERNAME: ${{ secrets.SNOWFLAKE_USERNAME }}
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
run: |
# Prepare SLT (Snowflake)
export PATH="$HOME/bin:$PATH"
if ./scripts/files-changed-in-branch.sh \
"scripts/prepare-testdata.sh" \
"scripts/create-test-snowflake-db.sh" \
"testdata/sqllogictests_datasources_common/data" \
"testdata/sqllogictests_snowflake/data"
then
export SNOWFLAKE_DATABASE=$(./scripts/create-test-snowflake-db.sh)
else
export SNOWFLAKE_DATABASE=glaredb_test
fi
just slt-bin 'sqllogictests_snowflake/*'
just slt-bin --protocol=rpc 'sqllogictests_snowflake/*'
datasource-integration-tests:
name: Datasource Integration (${{matrix.settings.name}})
strategy:
matrix:
settings:
- name: Clickhouse
path: "sqllogictests_clickhouse/*"
prepare: |
./scripts/prepare-testdata.sh
source ./scripts/ci-install-clickhouse.sh
export CLICKHOUSE_CONN_STRING=$(./scripts/create-test-clickhouse-db.sh)
- name: Cassandra
path: "sqllogictests_cassandra/*"
prepare: |
export CASSANDRA_CONN_STRING=$(./scripts/create-test-cassandra-db.sh | tail -n 1)
- name: Mysql
path: "sqllogictests_mysql/*"
prepare: |
./scripts/prepare-testdata.sh
export MYSQL_CONN_STRING=$(./scripts/create-test-mysql-db.sh)
export MYSQL_TUNNEL_SSH_CONN_STRING=$(echo "$MYSQL_CONN_STRING" | sed -n 2p)
export MYSQL_CONN_STRING=$(echo "$MYSQL_CONN_STRING" | sed -n 1p)
- name: MongoDB
path: "sqllogictests_mongodb/*"
prepare: |
./scripts/prepare-testdata.sh
export MONGO_CONN_STRING=$(./scripts/create-test-mongo-db.sh)
- name: Sqlserver
path: "sqllogictests_sqlserver/*"
prepare: |
./scripts/prepare-testdata.sh
export SQL_SERVER_CONN_STRING=$(./scripts/create-test-sqlserver-db.sh)
runs-on: ubuntu-latest
needs: ["sql-logic-tests"]
steps:
- name: checkout
uses: actions/checkout@v4
- uses: extractions/setup-just@v1
with:
just-version: "1.23.0"
- uses: actions/cache/restore@v4
name: glaredb cache
with:
path: target/debug/glaredb
key: ${{ runner.os }}-glaredb-bin-${{ github.run_id }}
fail-on-cache-miss: true
- name: run tests (slt)
run: |
${{matrix.settings.prepare}}
just slt-bin ${{matrix.settings.path}}
just slt-bin --protocol=rpc --exclude '*/tunnels/ssh' ${{matrix.settings.path}}
# for sqlserver, skip flightsql because the suite takes 4-5
# minutes, and Sqlserver is the longest/last task to finish
[ "${{matrix.settings.name}}" = "Sqlserver" ] && exit 0
just slt-bin --protocol=flightsql --exclude '*/tunnels/ssh' ${{matrix.settings.path}}