From 8439cf648251a444bc092cd0b6f386a5cab43639 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 01:43:46 +0100 Subject: [PATCH 01/51] remove torcharrow and update pyvelox cml --- pyvelox/CMakeLists.txt | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pyvelox/CMakeLists.txt b/pyvelox/CMakeLists.txt index 4bffa203b7d4..1dba45c58a7f 100644 --- a/pyvelox/CMakeLists.txt +++ b/pyvelox/CMakeLists.txt @@ -14,18 +14,16 @@ if(VELOX_BUILD_PYTHON_PACKAGE) message("Creating pyvelox module") - include_directories(SYSTEM ${CMAKE_SOURCE_DIR}) - add_definitions(-DCREATE_PYVELOX_MODULE -DVELOX_DISABLE_GOOGLETEST) # Define our Python module: pybind11_add_module( pyvelox MODULE + complex.cpp + conversion.cpp pyvelox.cpp serde.cpp - signatures.cpp - complex.cpp - conversion.cpp) - # Link with Velox: + signatures.cpp) + target_link_libraries( pyvelox PRIVATE velox_type @@ -37,11 +35,8 @@ if(VELOX_BUILD_PYTHON_PACKAGE) velox_functions_prestosql velox_functions_spark) + target_include_directories(pyvelox SYSTEM + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/..) + target_compile_definitions(pyvelox PRIVATE -DCREATE_PYVELOX_MODULE) install(TARGETS pyvelox LIBRARY DESTINATION .) -else() - # Torcharrow will not use pyvelox as an extension module for compatibility - # reasons. - message("Creating pyvelox library") - add_library(pyvelox pyvelox.cpp pyvelox.h) - target_link_libraries(pyvelox velox_type pybind11::module) endif() From dbdd1f0013b3632c3456b8e789e2faf54fea81d2 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 01:44:39 +0100 Subject: [PATCH 02/51] make build velox usable by pyvelox --- CMakeLists.txt | 18 +++++++----------- Makefile | 2 +- setup.py | 13 ++++++++++--- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa30c5fa0bd6..67bcc16f03bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,23 +166,19 @@ if(${VELOX_ENABLE_EXAMPLES}) endif() if(${VELOX_BUILD_PYTHON_PACKAGE}) - set(VELOX_BUILD_TESTING OFF) + # set(VELOX_BUILD_TESTING OFF) set(VELOX_ENABLE_PRESTO_FUNCTIONS ON) set(VELOX_ENABLE_DUCKDB ON) set(VELOX_ENABLE_EXPRESSION ON) set(VELOX_ENABLE_PARSE ON) set(VELOX_ENABLE_EXEC ON) - set(VELOX_ENABLE_AGGREGATES OFF) - set(VELOX_ENABLE_HIVE_CONNECTOR OFF) - set(VELOX_ENABLE_TPCH_CONNECTOR OFF) + # set(VELOX_ENABLE_AGGREGATES OFF) set(VELOX_ENABLE_HIVE_CONNECTOR OFF) + # set(VELOX_ENABLE_TPCH_CONNECTOR OFF) set(VELOX_ENABLE_SPARK_FUNCTIONS ON) - set(VELOX_ENABLE_EXAMPLES OFF) - set(VELOX_ENABLE_S3 OFF) - set(VELOX_ENABLE_GCS OFF) - set(VELOX_ENABLE_ABFS OFF) - set(VELOX_ENABLE_SUBSTRAIT OFF) - set(VELOX_ENABLE_BENCHMARKS_BASIC OFF) - set(VELOX_ENABLE_BENCHMARKS OFF) + # set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_S3 OFF) set(VELOX_ENABLE_GCS + # OFF) set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) + # set(VELOX_ENABLE_CODEGEN_SUPPORT OFF) set(VELOX_ENABLE_BENCHMARKS_BASIC OFF) + # set(VELOX_ENABLE_BENCHMARKS OFF) endif() # We look for OpenSSL here to cache the result enforce the version across our diff --git a/Makefile b/Makefile index 02f385b6eb71..b3f65bd10f94 100644 --- a/Makefile +++ b/Makefile @@ -183,7 +183,7 @@ python-clean: DEBUG=1 ${PYTHON_EXECUTABLE} setup.py clean python-build: - DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=4 ${PYTHON_EXECUTABLE} -m pip install -e .$(extras) --verbose + DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=${NUM_THREADS} ${PYTHON_EXECUTABLE} -m pip install -e .$(extras) --verbose python-test: $(MAKE) python-build extras="[tests]" diff --git a/setup.py b/setup.py index 465e511bb519..e29a18a7865a 100644 --- a/setup.py +++ b/setup.py @@ -109,6 +109,14 @@ def run(self): def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + # Allow using a pre-built Velox library (for CI and development) + if "VELOX_BUILD_DIR" in os.environ: + velox_dir = os.path.abspath(os.environ["VELOX_BUILD_DIR"]) + + if not os.path.isdir(extdir): + os.symlink(velox_dir, os.path.dirname(extdir), target_is_directory=True) + + return # required for auto-detection of auxiliary "native" libs if not extdir.endswith(os.path.sep): @@ -126,7 +134,6 @@ def build_extension(self, ext): f"-DCMAKE_BUILD_TYPE={cfg}", f"-DCMAKE_INSTALL_PREFIX={extdir}", "-DCMAKE_VERBOSE_MAKEFILE=ON", - "-DVELOX_BUILD_PYTHON_PACKAGE=ON", f"-DPYTHON_EXECUTABLE={exec_path} ", ] build_args = [] @@ -148,9 +155,9 @@ def build_extension(self, ext): os.makedirs(self.build_temp) subprocess.check_call( - ["cmake", str(ROOT_DIR)] + cmake_args, cwd=self.build_temp + ["cmake", str(os.path.join(ROOT_DIR, "pyvelox"))] + cmake_args, + cwd=self.build_temp, ) - print(self.build_temp) subprocess.check_call( ["cmake", "--build", "."] + build_args, cwd=self.build_temp ) From 530177cbfb3d432c8866b0f8f1f74be8b4f1897e Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 01:45:09 +0100 Subject: [PATCH 03/51] get or create function signatures --- .github/workflows/linux-build.yml | 71 ++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 73b37ee4c72c..542aba629664 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -111,20 +111,76 @@ jobs: defaults: run: shell: bash + working-directory: velox steps: - - uses: actions/checkout@v4 + - name: Get Function Signature Stash + uses: assignUser/stash/restore@v1 + id: get-sig + with: + path: /tmp/signatures + key: function-signatures + + - name: Get Ccache Stash + uses: assignUser/stash/restore@v1 + with: + path: '${{ env.CCACHE_DIR }}' + key: ccache-ubuntu-debug-default + + - name: Ensure Ccache dir exists + run: | + mkdir -p '${{ env.CCACHE_DIR }}' + + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Checkout Main + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + uses: actions/checkout@v4 + with: + ref: 'main' + path: velox_main - name: Install Dependencies + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + working-directory: velox_main run: | source scripts/setup-ubuntu.sh - - uses: assignUser/stash/restore@v1 + - name: Build PyVelox + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + env: + VELOX_DEPENDENCY_SOURCE: BUNDLED + MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" + working-directory: velox_main + run: | + python -m venv .venv + source .venv/bin/activate + + make python-build + + - name: Create Baseline Signatures + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + working-directory: velox_main + run: | + source .venv/bin/activate + python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json + python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json + + - name: Save Function Signature Stash + uses: assignUser/stash/save@v1 with: - path: '${{ env.CCACHE_DIR }}' - key: ccache-ubuntu-debug-default + path: /tmp/signatures + key: function-signatures + + - name: Checkout Contender + uses: actions/checkout@v4 + with: + path: velox - - run: | - mkdir -p .ccache + - name: Install Dependencies + run: | + source scripts/setup-ubuntu.sh - name: Clear CCache Statistics run: | @@ -135,7 +191,8 @@ jobs: VELOX_DEPENDENCY_SOURCE: BUNDLED MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" run: | - make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON" + make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON \ + -DVELOX_BUILD_PYTHON_PACKAGE=ON" - name: CCache after run: | From ef77d637f3d7469d0adc26194b2ca7f1239fd5ef Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 02:28:40 +0100 Subject: [PATCH 04/51] hardcode ref instead of main to workaround broken pr --- .github/workflows/linux-build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 542aba629664..1d3e10c1f371 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -138,7 +138,8 @@ jobs: if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} uses: actions/checkout@v4 with: - ref: 'main' + # hardcode ref without broken pr + ref: '43a552715e4c0c005cb73cfb6dca0f996de76f3a' path: velox_main - name: Install Dependencies From bcd14a3360602665f1bea3ffde6af1051877c384 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 02:55:49 +0100 Subject: [PATCH 05/51] fix working dir --- .github/workflows/linux-build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 1d3e10c1f371..a5a4d4e890c4 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -127,6 +127,7 @@ jobs: key: ccache-ubuntu-debug-default - name: Ensure Ccache dir exists + working-directory: ${{ github.workspace }} run: | mkdir -p '${{ env.CCACHE_DIR }}' From 465139a8736879cbbee5f93ab8502b2d2163a298 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 03:57:33 +0100 Subject: [PATCH 06/51] Add deepdiff --- .github/workflows/linux-build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index a5a4d4e890c4..b38e5ed36709 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -166,6 +166,7 @@ jobs: working-directory: velox_main run: | source .venv/bin/activate + python -m pip install deepdiff python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json From 88f45b8124912fe8862ff200aa1548dbd38a4796 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 04:35:44 +0100 Subject: [PATCH 07/51] fix stash dir on empty stash --- .github/workflows/linux-build.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index b38e5ed36709..28e95a3a9842 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -126,10 +126,11 @@ jobs: path: '${{ env.CCACHE_DIR }}' key: ccache-ubuntu-debug-default - - name: Ensure Ccache dir exists + - name: Ensure Stash Dirs Exists working-directory: ${{ github.workspace }} run: | mkdir -p '${{ env.CCACHE_DIR }}' + mkdir -p /tmp/signatures - uses: actions/setup-python@v5 with: @@ -161,6 +162,12 @@ jobs: make python-build + - name: Save Function Signature Stash + uses: assignUser/stash/save@v1 + with: + path: /tmp/signatures + key: function-signatures + - name: Create Baseline Signatures if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} working-directory: velox_main @@ -170,11 +177,6 @@ jobs: python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json - - name: Save Function Signature Stash - uses: assignUser/stash/save@v1 - with: - path: /tmp/signatures - key: function-signatures - name: Checkout Contender uses: actions/checkout@v4 From ee3b675d1cc6b470c8b0ae0bc9288dfcc0807209 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Thu, 21 Mar 2024 05:13:25 +0100 Subject: [PATCH 08/51] correctly save signatures --- .github/workflows/linux-build.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 28e95a3a9842..4922eaaa6c00 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -162,12 +162,6 @@ jobs: make python-build - - name: Save Function Signature Stash - uses: assignUser/stash/save@v1 - with: - path: /tmp/signatures - key: function-signatures - - name: Create Baseline Signatures if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} working-directory: velox_main @@ -177,6 +171,11 @@ jobs: python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json + - name: Save Function Signature Stash + uses: assignUser/stash/save@v1 + with: + path: /tmp/signatures + key: function-signatures - name: Checkout Contender uses: actions/checkout@v4 From 01f816dd07627b21d20844e5ec3b53aa1771905c Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 22 Mar 2024 03:56:36 +0100 Subject: [PATCH 09/51] make python options turn features on only --- CMakeLists.txt | 7 ------- setup.py | 2 ++ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67bcc16f03bc..e34fca1a7013 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,19 +166,12 @@ if(${VELOX_ENABLE_EXAMPLES}) endif() if(${VELOX_BUILD_PYTHON_PACKAGE}) - # set(VELOX_BUILD_TESTING OFF) set(VELOX_ENABLE_PRESTO_FUNCTIONS ON) set(VELOX_ENABLE_DUCKDB ON) set(VELOX_ENABLE_EXPRESSION ON) set(VELOX_ENABLE_PARSE ON) set(VELOX_ENABLE_EXEC ON) - # set(VELOX_ENABLE_AGGREGATES OFF) set(VELOX_ENABLE_HIVE_CONNECTOR OFF) - # set(VELOX_ENABLE_TPCH_CONNECTOR OFF) set(VELOX_ENABLE_SPARK_FUNCTIONS ON) - # set(VELOX_ENABLE_EXAMPLES OFF) set(VELOX_ENABLE_S3 OFF) set(VELOX_ENABLE_GCS - # OFF) set(VELOX_ENABLE_ABFS OFF) set(VELOX_ENABLE_SUBSTRAIT OFF) - # set(VELOX_ENABLE_CODEGEN_SUPPORT OFF) set(VELOX_ENABLE_BENCHMARKS_BASIC OFF) - # set(VELOX_ENABLE_BENCHMARKS OFF) endif() # We look for OpenSSL here to cache the result enforce the version across our diff --git a/setup.py b/setup.py index e29a18a7865a..63da6252c58a 100644 --- a/setup.py +++ b/setup.py @@ -134,6 +134,8 @@ def build_extension(self, ext): f"-DCMAKE_BUILD_TYPE={cfg}", f"-DCMAKE_INSTALL_PREFIX={extdir}", "-DCMAKE_VERBOSE_MAKEFILE=ON", + "-DVELOX_BUILD_MINIMAL=ON", + "-DVELOX_BUILD_PYTHON_PACKAGE=ON", f"-DPYTHON_EXECUTABLE={exec_path} ", ] build_args = [] From 63edae37bd0373c66662dd6d7bd0160e5f6205fa Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 22 Mar 2024 05:32:19 +0100 Subject: [PATCH 10/51] move bias fuzzer to scheduled.yml --- .github/workflows/linux-build.yml | 58 +--------- .github/workflows/scheduled.yml | 178 +++++++++++++++++++++++++++++- 2 files changed, 174 insertions(+), 62 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 4922eaaa6c00..f0ef422685aa 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -113,12 +113,6 @@ jobs: shell: bash working-directory: velox steps: - - name: Get Function Signature Stash - uses: assignUser/stash/restore@v1 - id: get-sig - with: - path: /tmp/signatures - key: function-signatures - name: Get Ccache Stash uses: assignUser/stash/restore@v1 @@ -130,55 +124,8 @@ jobs: working-directory: ${{ github.workspace }} run: | mkdir -p '${{ env.CCACHE_DIR }}' - mkdir -p /tmp/signatures - - - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Checkout Main - if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - uses: actions/checkout@v4 - with: - # hardcode ref without broken pr - ref: '43a552715e4c0c005cb73cfb6dca0f996de76f3a' - path: velox_main - - name: Install Dependencies - if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - working-directory: velox_main - run: | - source scripts/setup-ubuntu.sh - - - name: Build PyVelox - if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - env: - VELOX_DEPENDENCY_SOURCE: BUNDLED - MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" - working-directory: velox_main - run: | - python -m venv .venv - source .venv/bin/activate - - make python-build - - - name: Create Baseline Signatures - if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - working-directory: velox_main - run: | - source .venv/bin/activate - python -m pip install deepdiff - python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json - python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json - - - name: Save Function Signature Stash - uses: assignUser/stash/save@v1 - with: - path: /tmp/signatures - key: function-signatures - - - name: Checkout Contender - uses: actions/checkout@v4 + - uses: actions/checkout@v4 with: path: velox @@ -195,8 +142,7 @@ jobs: VELOX_DEPENDENCY_SOURCE: BUNDLED MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" run: | - make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON \ - -DVELOX_BUILD_PYTHON_PACKAGE=ON" + make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON" - name: CCache after run: | diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 6ea4d84d0c87..cfe96f29e9e7 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -71,21 +71,88 @@ env: jobs: compile: name: Build - runs-on: 8-core + runs-on: 16-core timeout-minutes: 120 env: CCACHE_DIR: "${{ github.workspace }}/.ccache/" CCACHE_BASEDIR: "${{ github.workspace }}" LINUX_DISTRO: "ubuntu" + defaults: + run: + shell: bash + working-directory: velox + outputs: + bias_presto: ${{ steps.sig-check.outputs.presto }} + bias_spark: ${{ steps.sig-check.outputs.spark }} + steps: + - name: Get Function Signature Stash + uses: assignUser/stash/restore@v1 + id: get-sig + with: + path: /tmp/signatures + key: function-signatures + - name: "Restore ccache" uses: assignUser/stash/restore@v1 with: path: "${{ env.CCACHE_DIR }}" key: ccache-fuzzer - - name: "Checkout Repo" + - name: Ensure Stash Dirs Exists + working-directory: ${{ github.workspace }} + run: | + mkdir -p '${{ env.CCACHE_DIR }}' + mkdir -p /tmp/signatures + + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Checkout Main + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + uses: actions/checkout@v4 + with: + # hardcode ref without broken pr + ref: '43a552715e4c0c005cb73cfb6dca0f996de76f3a' + path: velox_main + + - name: Install Dependencies + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + working-directory: velox_main + run: | + source scripts/setup-ubuntu.sh + + - name: Build PyVelox + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + env: + VELOX_DEPENDENCY_SOURCE: BUNDLED + MAKEFLAGS: "NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=8" + working-directory: velox_main + run: | + python -m venv .venv + source .venv/bin/activate + + make python-build + + - name: Create Baseline Signatures + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + working-directory: velox_main + run: | + source .venv/bin/activate + python -m pip install deepdiff + python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json + python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json + + - name: Save Function Signature Stash + if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + uses: assignUser/stash/save@v1 + with: + path: /tmp/signatures + key: function-signatures + + - name: "Checkout Contender" uses: actions/checkout@v4 with: path: velox @@ -94,14 +161,17 @@ jobs: - name: "Install dependencies" run: | - cd velox source ./scripts/setup-ubuntu.sh ccache -vsz - name: Build run: | - cd velox - make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}" + make debug NUM_THREADS="${{ inputs.numThreads || 16 }}" \ + MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" \ + MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 8 }}" \ + EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON \ + -DVELOX_BUILD_PYTHON_PACKAGE=ON \ + ${{ inputs.extraCMakeFlags }}" - name: Ccache after run: ccache -vs @@ -112,6 +182,52 @@ jobs: path: "${{ env.CCACHE_DIR }}" key: ccache-fuzzer retention-days: "${{ env.RETENTION }}" + + - name: Build PyVelox + env: + VELOX_BUILD_DIR: "_build/debug" + run: | + python -m venv .venv + source .venv/bin/activate + python -m pip install -e . + + - name: Create and test new function signatures + id: sig-check + run: | + source .venv/bin/activate + python -m pip install deepdiff + python scripts/signature.py export --spark /tmp/signatures/spark_signatures_contender.json + python scripts/signature.py export --presto /tmp/signatures/presto_signatures_contender.json + + python scripts/signature.py bias /tmp/signatures/presto_signatures_main.json \ + /tmp/signatures/presto_signatures_contender.json \ + /tmp/signatures/presto_bias_functions 2>&1 > /tmp/signatures/presto-err-message \ + || echo "::notice ::Presto Signature check failed" \ + | tee /tmp/signatures/presto-err-code + if [ -f "/tmp/signatures/presto_bias_functions" ]; then + echo "presto=true" >> $GITHUB_OUTPUT + else + echo "presto=false" >> $GITHUB_OUTPUT + fi + + python scripts/signature.py bias /tmp/signatures/spark_signatures_main.json \ + /tmp/signatures/spark_signatures_contender.json \ + /tmp/signatures/spark_bias_functions \ + || echo "::notice ::Spark Signature check failed" \ + | tee /tmp/signatures/spark-err-code + + if [ -f "/tmp/signatures/spark_bias_functions" ]; then + echo "spark=true" >> $GITHUB_OUTPUT + else + echo "spark=false" >> $GITHUB_OUTPUT + fi + + - name: Upload Signature Artifacts + uses: actions/upload-artifact@v4 + with: + name: signatures + path: /tmp/signatures + retention-days: "${{ env.RETENTION }}" - name: Upload presto fuzzer uses: actions/upload-artifact@v4 @@ -274,7 +390,57 @@ jobs: path: | /tmp/spark_aggregate_fuzzer_repro - linux-spark-fuzzer-run: + spark-bias-fuzzer: + name: "Spark Fuzzer" + runs-on: ubuntu-latest + needs: compile + if: ${{ needs.compile.outputs.spark == 'true' }} + timeout-minutes: 120 + steps: + + - name: "Checkout Repo" + uses: actions/checkout@v4 + with: + ref: "${{ inputs.ref }}" + + - name: "Install dependencies" + run: source ./scripts/setup-ubuntu.sh + + - name: Download spark expression fuzzer + uses: actions/download-artifact@v4 + with: + name: spark_expression_fuzzer + + - name: Download Signatures + uses: actions/download-artifact@v4 + with: + name: signatures + path: /tmp/signatures + + - name: "Run Spark Expression Fuzzer" + run: | + ls /tmp/signatures + mkdir -p /tmp/spark_fuzzer_repro/ + chmod -R 777 /tmp/spark_fuzzer_repro + chmod +x spark_expression_fuzzer_test + ./spark_expression_fuzzer_test \ + --seed ${RANDOM} \ + --duration_sec $DURATION \ + --logtostderr=1 \ + --minloglevel=0 \ + --assign_function_tickets $(cat /tmp/signatures/spark_bias_functions) \ + --repro_persist_path=/tmp/spark_fuzzer_repro \ + && echo -e "\n\nSpark Fuzzer run finished successfully." + + - name: Archive Spark expression production artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: spark-fuzzer-failure-artifacts + path: | + /tmp/spark_bias_fuzzer_repro + + spark-fuzzer: name: "Spark Fuzzer" runs-on: ubuntu-latest needs: compile From a6bb9708f085de913246803955107830d96c05d3 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 22 Mar 2024 05:41:48 +0100 Subject: [PATCH 11/51] use 8-core --- .github/workflows/scheduled.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index cfe96f29e9e7..84a61d511217 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -71,7 +71,7 @@ env: jobs: compile: name: Build - runs-on: 16-core + runs-on: 8-core timeout-minutes: 120 env: CCACHE_DIR: "${{ github.workspace }}/.ccache/" @@ -128,7 +128,7 @@ jobs: if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} env: VELOX_DEPENDENCY_SOURCE: BUNDLED - MAKEFLAGS: "NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=8" + MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" working-directory: velox_main run: | python -m venv .venv @@ -166,9 +166,9 @@ jobs: - name: Build run: | - make debug NUM_THREADS="${{ inputs.numThreads || 16 }}" \ - MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" \ - MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 8 }}" \ + make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" \ + MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 4 }}" \ + MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" \ EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON \ -DVELOX_BUILD_PYTHON_PACKAGE=ON \ ${{ inputs.extraCMakeFlags }}" From aaa61da80cb7c5434472afb85b4c5abf1c3b45a5 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 22 Mar 2024 05:46:28 +0100 Subject: [PATCH 12/51] add extra cmake flags to debug --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b3f65bd10f94..40905575462f 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ build: #: Build the software based in BUILD_DIR and BUILD_TYPE variables cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) -j $(NUM_THREADS) debug: #: Build with debugging symbols - $(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=Debug + $(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=Debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} $(MAKE) build BUILD_DIR=debug -j ${NUM_THREADS} release: #: Build the release version From 76b971ed39d389c5b62e5e3e169cf86b163b640f Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 22 Mar 2024 05:51:48 +0100 Subject: [PATCH 13/51] fix typo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 40905575462f..7b20cff4ddb6 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ build: #: Build the software based in BUILD_DIR and BUILD_TYPE variables cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) -j $(NUM_THREADS) debug: #: Build with debugging symbols - $(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=Debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} + $(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=Debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS}" $(MAKE) build BUILD_DIR=debug -j ${NUM_THREADS} release: #: Build the release version From ac257ba0b0500fe741de23e4dea9fa88b54ee74a Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 22 Mar 2024 06:18:54 +0100 Subject: [PATCH 14/51] use EXTRA_CMAKE_FLAGS as intended via envvar --- .github/workflows/linux-build.yml | 3 ++- .github/workflows/scheduled.yml | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index f0ef422685aa..0f291cbc0051 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -141,8 +141,9 @@ jobs: env: VELOX_DEPENDENCY_SOURCE: BUNDLED MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" + EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON" run: | - make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON" + make debug - name: CCache after run: | diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 84a61d511217..217736756295 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -165,13 +165,12 @@ jobs: ccache -vsz - name: Build + env: + EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" \ MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 4 }}" \ - MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" \ - EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON \ - -DVELOX_BUILD_PYTHON_PACKAGE=ON \ - ${{ inputs.extraCMakeFlags }}" + MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" - name: Ccache after run: ccache -vs From 6d65226d4e1fa83ab740248e32425c44a08de10a Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 00:36:15 +0100 Subject: [PATCH 15/51] use 16core for more ram --- .github/workflows/scheduled.yml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 217736756295..3dd232100eaa 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -71,12 +71,14 @@ env: jobs: compile: name: Build - runs-on: 8-core + runs-on: 16-core timeout-minutes: 120 env: CCACHE_DIR: "${{ github.workspace }}/.ccache/" CCACHE_BASEDIR: "${{ github.workspace }}" LINUX_DISTRO: "ubuntu" + MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 6 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 6 }}" + defaults: run: shell: bash @@ -126,9 +128,6 @@ jobs: - name: Build PyVelox if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - env: - VELOX_DEPENDENCY_SOURCE: BUNDLED - MAKEFLAGS: "NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4" working-directory: velox_main run: | python -m venv .venv @@ -162,18 +161,15 @@ jobs: - name: "Install dependencies" run: | source ./scripts/setup-ubuntu.sh - ccache -vsz + ccache -sz - name: Build env: EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | - make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" \ - MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 4 }}" \ - MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" - + make debug - name: Ccache after - run: ccache -vs + run: ccache -s - name: "Save ccache" uses: assignUser/stash/save@v1 From 5769a355301a98609d044d4606950747352e1dd8 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 00:50:15 +0100 Subject: [PATCH 16/51] run in container --- .github/workflows/scheduled.yml | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 3dd232100eaa..eb49593728c1 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -72,12 +72,13 @@ jobs: compile: name: Build runs-on: 16-core + container: ghcr.io/facebookincubator/velox-dev:centos8 timeout-minutes: 120 env: CCACHE_DIR: "${{ github.workspace }}/.ccache/" CCACHE_BASEDIR: "${{ github.workspace }}" LINUX_DISTRO: "ubuntu" - MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 6 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 6 }}" + MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 8 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 4 }}" defaults: run: @@ -100,7 +101,14 @@ jobs: uses: assignUser/stash/restore@v1 with: path: "${{ env.CCACHE_DIR }}" - key: ccache-fuzzer + key: ccache-fuzzer-centos + + - name: Fix git permissions + # Usually actions/checkout does this but as we run in a container + # it doesn't work + run: | + git config --global --add safe.directory /__w/velox/velox/velox + git config --global --add safe.directory /__w/velox/velox/velox_main - name: Ensure Stash Dirs Exists working-directory: ${{ github.workspace }} @@ -108,24 +116,14 @@ jobs: mkdir -p '${{ env.CCACHE_DIR }}' mkdir -p /tmp/signatures - - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - name: Checkout Main if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} uses: actions/checkout@v4 with: # hardcode ref without broken pr - ref: '43a552715e4c0c005cb73cfb6dca0f996de76f3a' + ref: 'main' path: velox_main - - name: Install Dependencies - if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - working-directory: velox_main - run: | - source scripts/setup-ubuntu.sh - - name: Build PyVelox if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} working-directory: velox_main @@ -158,9 +156,8 @@ jobs: submodules: 'recursive' ref: "${{ inputs.ref }}" - - name: "Install dependencies" + - name: "Zero Ccache Statistics" run: | - source ./scripts/setup-ubuntu.sh ccache -sz - name: Build @@ -168,6 +165,7 @@ jobs: EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | make debug + - name: Ccache after run: ccache -s @@ -175,7 +173,7 @@ jobs: uses: assignUser/stash/save@v1 with: path: "${{ env.CCACHE_DIR }}" - key: ccache-fuzzer + key: ccache-fuzzer-centos retention-days: "${{ env.RETENTION }}" - name: Build PyVelox From 79d9313e4d3cabf84c0fccf71536eb1cb5489341 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 00:55:17 +0100 Subject: [PATCH 17/51] fix workspace --- .github/workflows/scheduled.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index eb49593728c1..2f045b08f7f0 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -104,6 +104,7 @@ jobs: key: ccache-fuzzer-centos - name: Fix git permissions + working-directory: ${{ github.workspace }} # Usually actions/checkout does this but as we run in a container # it doesn't work run: | From 72d1540965816e69962eef88581924fe4fe9efcc Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 01:15:27 +0100 Subject: [PATCH 18/51] install python headers --- .github/workflows/scheduled.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 2f045b08f7f0..51ac5737efd4 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -116,6 +116,8 @@ jobs: run: | mkdir -p '${{ env.CCACHE_DIR }}' mkdir -p /tmp/signatures + # this can be removed after #8917 + dnf install -q -y python39-devel - name: Checkout Main if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} From 2242e7f537855b5e69df52f32f37523ac9effe97 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 01:25:20 +0100 Subject: [PATCH 19/51] pass correct python executable --- .github/workflows/scheduled.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 51ac5737efd4..93393f25ebf2 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -167,6 +167,7 @@ jobs: env: EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | + EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")" make debug - name: Ccache after From d910a45bb798fc1f3857b1363874c3c1fa439793 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 02:14:54 +0100 Subject: [PATCH 20/51] fix python command --- .github/workflows/scheduled.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 93393f25ebf2..872aea37bd8f 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -131,7 +131,7 @@ jobs: if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} working-directory: velox_main run: | - python -m venv .venv + python3 -m venv .venv source .venv/bin/activate make python-build @@ -141,9 +141,9 @@ jobs: working-directory: velox_main run: | source .venv/bin/activate - python -m pip install deepdiff - python scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json - python scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json + python3 -m pip install deepdiff + python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json + python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json - name: Save Function Signature Stash if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} @@ -167,7 +167,7 @@ jobs: env: EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | - EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")" + EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") $EXTRA_CMAKE_FLAGS" make debug - name: Ccache after @@ -184,19 +184,19 @@ jobs: env: VELOX_BUILD_DIR: "_build/debug" run: | - python -m venv .venv + python3 -m venv .venv source .venv/bin/activate - python -m pip install -e . + python3 -m pip install -e . - name: Create and test new function signatures id: sig-check run: | source .venv/bin/activate - python -m pip install deepdiff - python scripts/signature.py export --spark /tmp/signatures/spark_signatures_contender.json - python scripts/signature.py export --presto /tmp/signatures/presto_signatures_contender.json + python3 -m pip install deepdiff + python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_contender.json + python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_contender.json - python scripts/signature.py bias /tmp/signatures/presto_signatures_main.json \ + python3 scripts/signature.py bias /tmp/signatures/presto_signatures_main.json \ /tmp/signatures/presto_signatures_contender.json \ /tmp/signatures/presto_bias_functions 2>&1 > /tmp/signatures/presto-err-message \ || echo "::notice ::Presto Signature check failed" \ @@ -207,7 +207,7 @@ jobs: echo "presto=false" >> $GITHUB_OUTPUT fi - python scripts/signature.py bias /tmp/signatures/spark_signatures_main.json \ + python3 scripts/signature.py bias /tmp/signatures/spark_signatures_main.json \ /tmp/signatures/spark_signatures_contender.json \ /tmp/signatures/spark_bias_functions \ || echo "::notice ::Spark Signature check failed" \ From ac33a230525d9cf854c63bc4c5e70a299d6071ec Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 03:23:13 +0100 Subject: [PATCH 21/51] move fuzzer runs to container as well --- .github/workflows/scheduled.yml | 54 ++++----------------------------- 1 file changed, 6 insertions(+), 48 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 872aea37bd8f..bb61f320f668 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -271,18 +271,11 @@ jobs: linux-presto-fuzzer-run: name: "Presto Fuzzer" runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile timeout-minutes: 120 steps: - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: "Install dependencies" - run: source ./scripts/setup-ubuntu.sh - - uses: dorny/paths-filter@v3 if: github.event_name == 'pull_request' id: changes @@ -349,18 +342,11 @@ jobs: linux-spark-aggregate-fuzzer-run: name: "Spark Aggregate Fuzzer" runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile timeout-minutes: 60 steps: - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: "Install dependencies" - run: source ./scripts/setup-ubuntu.sh - - name: Download spark aggregation fuzzer uses: actions/download-artifact@v4 with: @@ -390,19 +376,12 @@ jobs: spark-bias-fuzzer: name: "Spark Fuzzer" runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile if: ${{ needs.compile.outputs.spark == 'true' }} timeout-minutes: 120 steps: - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: "Install dependencies" - run: source ./scripts/setup-ubuntu.sh - - name: Download spark expression fuzzer uses: actions/download-artifact@v4 with: @@ -440,18 +419,11 @@ jobs: spark-fuzzer: name: "Spark Fuzzer" runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile timeout-minutes: 120 steps: - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: "Install dependencies" - run: source ./scripts/setup-ubuntu.sh - - name: Download spark expression fuzzer uses: actions/download-artifact@v4 with: @@ -488,18 +460,11 @@ jobs: linux-aggregate-fuzzer-run: name: "Aggregate Fuzzer" runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile timeout-minutes: 120 steps: - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: "Install dependencies" - run: source ./scripts/setup-ubuntu.sh - - name: Download aggregation fuzzer uses: actions/download-artifact@v4 with: @@ -530,18 +495,11 @@ jobs: linux-join-fuzzer-run: name: "Join Fuzzer" runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile timeout-minutes: 120 steps: - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: "Install dependencies" - run: source ./scripts/setup-ubuntu.sh - - name: Download join fuzzer uses: actions/download-artifact@v4 with: From b3b97d93eded1ae1686c78c1c88701be511341c3 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 03:31:42 +0100 Subject: [PATCH 22/51] add presto bias fuzzer run --- .github/workflows/scheduled.yml | 87 +++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 14 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index bb61f320f668..d8cdb390cc66 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -115,7 +115,7 @@ jobs: working-directory: ${{ github.workspace }} run: | mkdir -p '${{ env.CCACHE_DIR }}' - mkdir -p /tmp/signatures + mkdir -p /tmp/signatures # this can be removed after #8917 dnf install -q -y python39-devel @@ -179,7 +179,7 @@ jobs: path: "${{ env.CCACHE_DIR }}" key: ccache-fuzzer-centos retention-days: "${{ env.RETENTION }}" - + - name: Build PyVelox env: VELOX_BUILD_DIR: "_build/debug" @@ -188,14 +188,14 @@ jobs: source .venv/bin/activate python3 -m pip install -e . - - name: Create and test new function signatures + - name: Create and test new function signatures id: sig-check run: | source .venv/bin/activate python3 -m pip install deepdiff python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_contender.json python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_contender.json - + python3 scripts/signature.py bias /tmp/signatures/presto_signatures_main.json \ /tmp/signatures/presto_signatures_contender.json \ /tmp/signatures/presto_bias_functions 2>&1 > /tmp/signatures/presto-err-message \ @@ -206,7 +206,7 @@ jobs: else echo "presto=false" >> $GITHUB_OUTPUT fi - + python3 scripts/signature.py bias /tmp/signatures/spark_signatures_main.json \ /tmp/signatures/spark_signatures_contender.json \ /tmp/signatures/spark_bias_functions \ @@ -219,11 +219,11 @@ jobs: echo "spark=false" >> $GITHUB_OUTPUT fi - - name: Upload Signature Artifacts + - name: Upload Signature Artifacts uses: actions/upload-artifact@v4 with: name: signatures - path: /tmp/signatures + path: /tmp/signatures retention-days: "${{ env.RETENTION }}" - name: Upload presto fuzzer @@ -268,7 +268,7 @@ jobs: path: velox/_build/debug//velox/exec/tests/velox_exchange_fuzzer_test retention-days: "${{ env.RETENTION }}" - linux-presto-fuzzer-run: + presto-fuzzer-run: name: "Presto Fuzzer" runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 @@ -339,7 +339,66 @@ jobs: path: | /tmp/fuzzer_repro - linux-spark-aggregate-fuzzer-run: + presto-bias-fuzzer: + name: "Presto Bias Fuzzer" + runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 + needs: compile + if: ${{ needs.compile.outputs.spark == 'true' }} + timeout-minutes: 120 + steps: + + - name: Download presto expression fuzzer + uses: actions/download-artifact@v4 + with: + name: presto + + - name: Download Signatures + uses: actions/download-artifact@v4 + with: + name: signatures + path: /tmp/signatures + + - name: "Run Spark Expression Fuzzer" + run: | + ls /tmp/signatures + mkdir -p /tmp/presto_fuzzer_repro/ + chmod -R 777 /tmp/presto_fuzzer_repro + chmod +x velox_expression_fuzzer_test + ./velox_expression_fuzzer_test \ + --seed ${RANDOM} \ + --lazy_vector_generation_ratio 0.2 \ + --assign_function_tickets $(cat /tmp/signatures/presto_bias_functions) \ + --duration_sec 3600 \ + --enable_variadic_signatures \ + --velox_fuzzer_enable_complex_types \ + --velox_fuzzer_enable_column_reuse \ + --velox_fuzzer_enable_expression_reuse \ + --max_expression_trees_per_step 2 \ + --retry_with_try \ + --enable_dereference \ + --logtostderr=1 \ + --minloglevel=0 \ + --repro_persist_path=/tmp/presto_fuzzer_repro \ + && echo -e "\n\nPresto Fuzzer run finished successfully." + + - name: Surface fpresto function signature errors + run: | + if [ -f /tmp/signatures/presto-signature-error-code ]; then + echo "Incompatible changes have been made to function signatures:\n" + cat /tmp/signatures/presto-err-message + exit 1 + fi + + - name: Archive Spark expression production artifacts + if: ${{ !cancelled() }} + uses: actions/upload-artifact@v4 + with: + name: presto-bias-fuzzer-failure-artifacts + path: | + /tmp/presto_bias_fuzzer_repro + + spark-aggregate-fuzzer-run: name: "Spark Aggregate Fuzzer" runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 @@ -374,7 +433,7 @@ jobs: /tmp/spark_aggregate_fuzzer_repro spark-bias-fuzzer: - name: "Spark Fuzzer" + name: "Spark Bias Fuzzer" runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -390,12 +449,12 @@ jobs: - name: Download Signatures uses: actions/download-artifact@v4 with: - name: signatures + name: signatures path: /tmp/signatures - name: "Run Spark Expression Fuzzer" run: | - ls /tmp/signatures + ls /tmp/signatures mkdir -p /tmp/spark_fuzzer_repro/ chmod -R 777 /tmp/spark_fuzzer_repro chmod +x spark_expression_fuzzer_test @@ -457,7 +516,7 @@ jobs: path: | /tmp/spark_fuzzer_repro - linux-aggregate-fuzzer-run: + aggregate-fuzzer-run: name: "Aggregate Fuzzer" runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 @@ -492,7 +551,7 @@ jobs: path: | /tmp/aggregate_fuzzer_repro - linux-join-fuzzer-run: + join-fuzzer-run: name: "Join Fuzzer" runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 From a3728ef6e28ef53c9d3b061a3c4af75777b0b278 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 03:57:39 +0100 Subject: [PATCH 23/51] rebase exchange fuzzer --- .github/workflows/scheduled.yml | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index d8cdb390cc66..2dd30d5f465d 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -585,39 +585,35 @@ jobs: path: | /tmp/join_fuzzer_repro - linux-exchange-fuzzer-run: + exchange-fuzzer-run: runs-on: ubuntu-latest needs: compile timeout-minutes: 120 steps: - - name: Checkout Repo - uses: actions/checkout@v4 - with: - ref: "${{ inputs.ref }}" - - - name: Install dependencies - run: source ./scripts/setup-ubuntu.sh - - name: Download exchange fuzzer uses: actions/download-artifact@v4 with: name: exchange + - run: sudo sysctl -w vm.max_map_count=67108864 + - name: Run exchange Fuzzer + # max_map_count can only be set on the host so we + # have to work around it like this. + shell: docker run --rm -v ${{ github.workspace }}:/velox -w /velox ghcr.io/facebookincubator/velox-dev:centos8 /bin/bash -e {0} run: | - sudo sysctl -w vm.max_map_count=67108864 cat /proc/sys/vm/max_map_count - mkdir -p /tmp/exchange_fuzzer_repro/ - rm -rfv /tmp/exchange_fuzzer_repro/* - chmod -R 777 /tmp/exchange_fuzzer_repro + mkdir -p /velox/exchange_fuzzer_repro/ + rm -rfv /velox/exchange_fuzzer_repro/* + chmod -R 777 /velox/exchange_fuzzer_repro chmod +x velox_exchange_fuzzer_test ./velox_exchange_fuzzer_test \ --seed ${RANDOM} \ - --duration_sec $DURATION \ + --duration_sec ${{ env.DURATION }} \ --logtostderr=1 \ --minloglevel=0 \ - --repro_path=/tmp/exchange_fuzzer_repro \ + --repro_path=/velox/exchange_fuzzer_repro \ && echo -e "\n\Exchange fuzzer run finished successfully." - name: Archive Exchange production artifacts @@ -626,8 +622,7 @@ jobs: with: name: exchange-fuzzer-failure-artifacts path: | - /tmp/exchange_fuzzer_repro - + exchange_fuzzer_repro presto-java-aggregation-fuzzer-run: name: Aggregation Fuzzer with Presto as source of truth @@ -684,7 +679,7 @@ jobs: ls -lR $PRESTO_HOME/etc $PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & # Sleep for 60 seconds to allow Presto server to start. - sleep 60 + sleep 60 /opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' mkdir -p /tmp/aggregate_fuzzer_repro/ rm -rfv /tmp/aggregate_fuzzer_repro/* From 10f15980b16f3e69329ca25fcfbdb0aebb7f18fb Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 04:10:24 +0100 Subject: [PATCH 24/51] try without setting max_map_count --- .github/workflows/scheduled.yml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 2dd30d5f465d..f6b68c2fc2c4 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -587,6 +587,7 @@ jobs: exchange-fuzzer-run: runs-on: ubuntu-latest + container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile timeout-minutes: 120 steps: @@ -596,24 +597,19 @@ jobs: with: name: exchange - - run: sudo sysctl -w vm.max_map_count=67108864 - - name: Run exchange Fuzzer - # max_map_count can only be set on the host so we - # have to work around it like this. - shell: docker run --rm -v ${{ github.workspace }}:/velox -w /velox ghcr.io/facebookincubator/velox-dev:centos8 /bin/bash -e {0} run: | cat /proc/sys/vm/max_map_count - mkdir -p /velox/exchange_fuzzer_repro/ - rm -rfv /velox/exchange_fuzzer_repro/* - chmod -R 777 /velox/exchange_fuzzer_repro + mkdir -p /tmp/exchange_fuzzer_repro/ + rm -rfv /tmp/exchange_fuzzer_repro/* + chmod -R 777 /tmp/exchange_fuzzer_repro chmod +x velox_exchange_fuzzer_test ./velox_exchange_fuzzer_test \ --seed ${RANDOM} \ --duration_sec ${{ env.DURATION }} \ --logtostderr=1 \ --minloglevel=0 \ - --repro_path=/velox/exchange_fuzzer_repro \ + --repro_path=/tmp/exchange_fuzzer_repro \ && echo -e "\n\Exchange fuzzer run finished successfully." - name: Archive Exchange production artifacts @@ -622,7 +618,7 @@ jobs: with: name: exchange-fuzzer-failure-artifacts path: | - exchange_fuzzer_repro + /tmp/exchange_fuzzer_repro presto-java-aggregation-fuzzer-run: name: Aggregation Fuzzer with Presto as source of truth From 31c4ab7e4bb21a77f50e58d89dd369a28fc5a6f2 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 23 Mar 2024 04:32:05 +0100 Subject: [PATCH 25/51] properly set ccache base dir for inside container --- .github/workflows/experimental.yml | 4 +--- .github/workflows/linux-build.yml | 13 +++++++++---- .github/workflows/scheduled.yml | 6 ++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.github/workflows/experimental.yml b/.github/workflows/experimental.yml index ac9a658f86f2..7394f0836e1a 100644 --- a/.github/workflows/experimental.yml +++ b/.github/workflows/experimental.yml @@ -51,7 +51,6 @@ jobs: timeout-minutes: 120 env: CCACHE_DIR: "${{ github.workspace }}/.ccache/" - CCACHE_BASEDIR: "${{ github.workspace }}" LINUX_DISTRO: "ubuntu" steps: @@ -110,8 +109,7 @@ jobs: container: ghcr.io/facebookincubator/velox-dev:presto-java timeout-minutes: 120 env: - CCACHE_DIR: "${{ github.workspace }}/.ccache/" - CCACHE_BASEDIR: "${{ github.workspace }}" + CCACHE_DIR: "/__w/velox/velox/.ccache/" LINUX_DISTRO: "centos" steps: diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 0f291cbc0051..366681d665b4 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -53,8 +53,7 @@ jobs: run: shell: bash env: - CCACHE_DIR: "${{ github.workspace }}/.ccache" - CCACHE_BASEDIR: "${{ github.workspace }}" + CCACHE_DIR: "/__w/velox/velox/.ccache" VELOX_DEPENDENCY_SOURCE: SYSTEM simdjson_SOURCE: BUNDLED xsimd_SOURCE: BUNDLED @@ -71,6 +70,10 @@ jobs: path: '${{ env.CCACHE_DIR }}' key: ccache-linux-adapters + - name: "Zero Ccache Statistics" + run: | + ccache -sz + - name: Make Release Build env: MAKEFLAGS: 'NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4' @@ -87,6 +90,9 @@ jobs: "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON" ) make release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS[*]}" + + - name: Ccache after + run: ccache -s - uses: assignUser/stash/save@v1 with: @@ -97,7 +103,7 @@ jobs: # Some of the adapters dependencies are in the 'adapters' conda env shell: mamba run --no-capture-output -n adapters /usr/bin/bash -e {0} env: - LIBHDFS3_CONF: "${{ github.workspace }}/.circleci/hdfs-client.xml" + LIBHDFS3_CONF: "/__w/velox/velox/.circleci/hdfs-client.xml" working-directory: _build/release run: | ctest -j 8 --output-on-failure --no-tests=error @@ -107,7 +113,6 @@ jobs: name: "Ubuntu debug with resolve_dependency" env: CCACHE_DIR: "${{ github.workspace }}/.ccache" - CCACHE_BASEDIR: "${{ github.workspace }}" defaults: run: shell: bash diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index f6b68c2fc2c4..ecc636823cb9 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -75,8 +75,7 @@ jobs: container: ghcr.io/facebookincubator/velox-dev:centos8 timeout-minutes: 120 env: - CCACHE_DIR: "${{ github.workspace }}/.ccache/" - CCACHE_BASEDIR: "${{ github.workspace }}" + CCACHE_DIR: "/__w/velox/velox/.ccache/" LINUX_DISTRO: "ubuntu" MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 8 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 4 }}" @@ -129,6 +128,8 @@ jobs: - name: Build PyVelox if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} + env: + CCACHE_BASEDIR: "/__w/velox/velox/velox_main" working-directory: velox_main run: | python3 -m venv .venv @@ -165,6 +166,7 @@ jobs: - name: Build env: + CCACHE_BASEDIR: "/__w/velox/velox/velox" EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") $EXTRA_CMAKE_FLAGS" From cdd21bfcceae941c2a265f397ee9d0f6a00a1584 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Tue, 26 Mar 2024 02:12:42 +0100 Subject: [PATCH 26/51] adapt prest based agg fuzzer to new workflow setup --- .github/workflows/scheduled.yml | 78 ++++++++++----------------------- 1 file changed, 24 insertions(+), 54 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index ecc636823cb9..b1f50b0d8ccc 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -24,7 +24,6 @@ on: - "third_party/**" - "scripts/setup-ubuntu.sh" - "scripts/setup-helper-functions.sh" - - ".github/workflows/linux-build.yml" - ".github/workflows/scheduled.yml" schedule: @@ -96,7 +95,7 @@ jobs: path: /tmp/signatures key: function-signatures - - name: "Restore ccache" + - name: Restore ccache uses: assignUser/stash/restore@v1 with: path: "${{ env.CCACHE_DIR }}" @@ -153,14 +152,14 @@ jobs: path: /tmp/signatures key: function-signatures - - name: "Checkout Contender" + - name: Checkout Contender uses: actions/checkout@v4 with: path: velox submodules: 'recursive' ref: "${{ inputs.ref }}" - - name: "Zero Ccache Statistics" + - name: Zero Ccache Statistics run: | ccache -sz @@ -175,7 +174,7 @@ jobs: - name: Ccache after run: ccache -s - - name: "Save ccache" + - name: Save ccache uses: assignUser/stash/save@v1 with: path: "${{ env.CCACHE_DIR }}" @@ -271,7 +270,7 @@ jobs: retention-days: "${{ env.RETENTION }}" presto-fuzzer-run: - name: "Presto Fuzzer" + name: Presto Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -312,7 +311,7 @@ jobs: with: name: presto - - name: "Run Presto Fuzzer" + - name: Run Presto Fuzzer run: | mkdir -p /tmp/fuzzer_repro/ chmod -R 777 /tmp/fuzzer_repro @@ -342,7 +341,7 @@ jobs: /tmp/fuzzer_repro presto-bias-fuzzer: - name: "Presto Bias Fuzzer" + name: Presto Bias Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -361,7 +360,7 @@ jobs: name: signatures path: /tmp/signatures - - name: "Run Spark Expression Fuzzer" + - name: Run Spark Expression Fuzzer run: | ls /tmp/signatures mkdir -p /tmp/presto_fuzzer_repro/ @@ -401,7 +400,7 @@ jobs: /tmp/presto_bias_fuzzer_repro spark-aggregate-fuzzer-run: - name: "Spark Aggregate Fuzzer" + name: Spark Aggregate Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -413,7 +412,7 @@ jobs: with: name: spark_aggregation_fuzzer - - name: "Run Spark Aggregate Fuzzer" + - name: Run Spark Aggregate Fuzzer run: | mkdir -p /tmp/spark_aggregate_fuzzer_repro/ chmod -R 777 /tmp/spark_aggregate_fuzzer_repro @@ -435,7 +434,7 @@ jobs: /tmp/spark_aggregate_fuzzer_repro spark-bias-fuzzer: - name: "Spark Bias Fuzzer" + name: Spark Bias Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -454,7 +453,7 @@ jobs: name: signatures path: /tmp/signatures - - name: "Run Spark Expression Fuzzer" + - name: Run Spark Expression Fuzzer run: | ls /tmp/signatures mkdir -p /tmp/spark_fuzzer_repro/ @@ -478,7 +477,7 @@ jobs: /tmp/spark_bias_fuzzer_repro spark-fuzzer: - name: "Spark Fuzzer" + name: Spark Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -490,7 +489,7 @@ jobs: with: name: spark_expression_fuzzer - - name: "Run Spark Expression Fuzzer" + - name: Run Spark Expression Fuzzer run: | mkdir -p /tmp/spark_fuzzer_repro/ chmod -R 777 /tmp/spark_fuzzer_repro @@ -519,7 +518,7 @@ jobs: /tmp/spark_fuzzer_repro aggregate-fuzzer-run: - name: "Aggregate Fuzzer" + name: Aggregate Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -531,7 +530,7 @@ jobs: with: name: aggregation - - name: "Run Aggregate Fuzzer" + - name: Run Aggregate Fuzzer run: | mkdir -p /tmp/aggregate_fuzzer_repro/ rm -rfv /tmp/aggregate_fuzzer_repro/* @@ -554,7 +553,7 @@ jobs: /tmp/aggregate_fuzzer_repro join-fuzzer-run: - name: "Join Fuzzer" + name: Join Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -566,7 +565,7 @@ jobs: with: name: join - - name: "Run Join Fuzzer" + - name: Run Join Fuzzer run: | mkdir -p /tmp/join_fuzzer_repro/ rm -rfv /tmp/join_fuzzer_repro/* @@ -588,6 +587,7 @@ jobs: /tmp/join_fuzzer_repro exchange-fuzzer-run: + name: Exchange Fuzzer runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -624,6 +624,7 @@ jobs: presto-java-aggregation-fuzzer-run: name: Aggregation Fuzzer with Presto as source of truth + needs: compile runs-on: 8-core container: ghcr.io/facebookincubator/velox-dev:presto-java timeout-minutes: 120 @@ -634,41 +635,10 @@ jobs: LINUX_DISTRO: "centos" steps: - - name: "Restore ccache" - uses: assignUser/stash/restore@v1 - with: - path: "${{ env.CCACHE_DIR }}" - key: ccache-presto-java-fuzzer - - - name: "Checkout Repo" - uses: actions/checkout@v4 - with: - path: velox - submodules: 'recursive' - ref: "${{ inputs.ref }}" - - - name: Fix git permissions - # Usually actions/checkout does this but as we run in a container - # it doesn't work - run: git config --global --add safe.directory /__w/velox/velox/velox - - - name: Zero Ccache Statistics - run: ccache -sz - - - - name: "Build" - env: - EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}" - run: | - cd velox - make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}" - ccache -s - - - name: "Save ccache" - uses: assignUser/stash/save@v1 + - name: Download aggregation fuzzer + uses: actions/download-artifact@v4 with: - path: "${{ env.CCACHE_DIR }}" - key: ccache-presto-java-fuzzer + name: aggregation - name: "Run Aggregate Fuzzer" run: | @@ -682,7 +652,7 @@ jobs: mkdir -p /tmp/aggregate_fuzzer_repro/ rm -rfv /tmp/aggregate_fuzzer_repro/* chmod -R 777 /tmp/aggregate_fuzzer_repro - _build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test \ + velox_aggregation_fuzzer_test \ --seed ${RANDOM} \ --duration_sec $DURATION \ --logtostderr=1 \ From 45bc625d971da0331f2f92f37cd212612d30aa62 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Tue, 26 Mar 2024 02:58:10 +0100 Subject: [PATCH 27/51] run presto-java agg on ubuntu-latest --- .github/workflows/scheduled.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index b1f50b0d8ccc..3bf9e44bfc92 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -625,7 +625,7 @@ jobs: presto-java-aggregation-fuzzer-run: name: Aggregation Fuzzer with Presto as source of truth needs: compile - runs-on: 8-core + runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:presto-java timeout-minutes: 120 if: ${{ github.event_name != 'pull_request' }} From 1bc9dc86c2a840d2a0c02e9d49e5661d2de32148 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 27 Mar 2024 02:24:02 +0100 Subject: [PATCH 28/51] add messaging when velox_build_dir is used --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 63da6252c58a..111ede9e5567 100644 --- a/setup.py +++ b/setup.py @@ -109,13 +109,15 @@ def run(self): def build_extension(self, ext): extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) - # Allow using a pre-built Velox library (for CI and development) + # Allow using a pre-built Velox library (for CI and development) e.g. 'VELOX_BUILD_DIR=_build/velox/debug' + # The build in question must have been built with 'VELOX_BUILD_PYTHON_PACKAGE=ON' and the same python version. if "VELOX_BUILD_DIR" in os.environ: velox_dir = os.path.abspath(os.environ["VELOX_BUILD_DIR"]) if not os.path.isdir(extdir): os.symlink(velox_dir, os.path.dirname(extdir), target_is_directory=True) + print(f"Using pre-built Velox library from {velox_dir}") return # required for auto-detection of auxiliary "native" libs From e893353ca69418f98ebc73f9ba68c8d13759e988 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 27 Mar 2024 02:56:57 +0100 Subject: [PATCH 29/51] review feedback --- .github/workflows/scheduled.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 3bf9e44bfc92..1dbfd1957df3 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -168,7 +168,7 @@ jobs: CCACHE_BASEDIR: "/__w/velox/velox/velox" EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | - EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") $EXTRA_CMAKE_FLAGS" + EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS" make debug - name: Ccache after @@ -386,7 +386,7 @@ jobs: - name: Surface fpresto function signature errors run: | if [ -f /tmp/signatures/presto-signature-error-code ]; then - echo "Incompatible changes have been made to function signatures:\n" + echo "Incompatible changes have been made to function signatures:" cat /tmp/signatures/presto-err-message exit 1 fi From 0ef5d04660543ab27c0e6710199ccc852b788031 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 27 Mar 2024 05:26:26 +0100 Subject: [PATCH 30/51] add new function to signature.py to export and compare signatures in CI --- .github/workflows/scheduled.yml | 58 +++++++-------------- scripts/signature.py | 92 ++++++++++++++++++++++++++++++--- 2 files changed, 105 insertions(+), 45 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 1dbfd1957df3..52942a67cc4e 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -83,8 +83,10 @@ jobs: shell: bash working-directory: velox outputs: - bias_presto: ${{ steps.sig-check.outputs.presto }} - bias_spark: ${{ steps.sig-check.outputs.spark }} + presto_bias: ${{ steps.sig-check.outputs.presto_functions }} + presto_erro: ${{ steps.sig-check.outputs.presto_error }} + spark_bias: ${{ steps.sig-check.outputs.spark_functions }} + spark_error: ${{ steps.sig-check.outputs.spark_error }} steps: @@ -194,31 +196,7 @@ jobs: run: | source .venv/bin/activate python3 -m pip install deepdiff - python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_contender.json - python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_contender.json - - python3 scripts/signature.py bias /tmp/signatures/presto_signatures_main.json \ - /tmp/signatures/presto_signatures_contender.json \ - /tmp/signatures/presto_bias_functions 2>&1 > /tmp/signatures/presto-err-message \ - || echo "::notice ::Presto Signature check failed" \ - | tee /tmp/signatures/presto-err-code - if [ -f "/tmp/signatures/presto_bias_functions" ]; then - echo "presto=true" >> $GITHUB_OUTPUT - else - echo "presto=false" >> $GITHUB_OUTPUT - fi - - python3 scripts/signature.py bias /tmp/signatures/spark_signatures_main.json \ - /tmp/signatures/spark_signatures_contender.json \ - /tmp/signatures/spark_bias_functions \ - || echo "::notice ::Spark Signature check failed" \ - | tee /tmp/signatures/spark-err-code - - if [ -f "/tmp/signatures/spark_bias_functions" ]; then - echo "spark=true" >> $GITHUB_OUTPUT - else - echo "spark=false" >> $GITHUB_OUTPUT - fi + python3 scripts/signature.py gh_bias_check presto spark - name: Upload Signature Artifacts uses: actions/upload-artifact@v4 @@ -345,7 +323,7 @@ jobs: runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile - if: ${{ needs.compile.outputs.spark == 'true' }} + if: ${{ needs.compile.outputs.presto_functions == 'true' }} timeout-minutes: 120 steps: @@ -383,14 +361,6 @@ jobs: --repro_persist_path=/tmp/presto_fuzzer_repro \ && echo -e "\n\nPresto Fuzzer run finished successfully." - - name: Surface fpresto function signature errors - run: | - if [ -f /tmp/signatures/presto-signature-error-code ]; then - echo "Incompatible changes have been made to function signatures:" - cat /tmp/signatures/presto-err-message - exit 1 - fi - - name: Archive Spark expression production artifacts if: ${{ !cancelled() }} uses: actions/upload-artifact@v4 @@ -438,7 +408,7 @@ jobs: runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile - if: ${{ needs.compile.outputs.spark == 'true' }} + if: ${{ needs.compile.outputs.spark_functions == 'true' }} timeout-minutes: 120 steps: @@ -625,7 +595,7 @@ jobs: presto-java-aggregation-fuzzer-run: name: Aggregation Fuzzer with Presto as source of truth needs: compile - runs-on: ubuntu-latest + runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:presto-java timeout-minutes: 120 if: ${{ github.event_name != 'pull_request' }} @@ -670,3 +640,15 @@ jobs: path: | /tmp/aggregate_fuzzer_repro /tmp/server.log + + surface-signature-errors: + name: Signature Changes + needs: compile + runs-on: ubuntu-latest + steps: + - name: Surface Presto function signature errors + if: ${{ needs.compile.outputs.presto_error }} + run: | + echo "Found incompatible changes in presto function signatures. Please check the summary for details." + exit 1 + diff --git a/scripts/signature.py b/scripts/signature.py index 17aa0b33c87e..313c39466e79 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -13,11 +13,12 @@ # limitations under the License. import argparse import json +import os import sys -import pyvelox.pyvelox as pv from deepdiff import DeepDiff +import pyvelox.pyvelox as pv # Utility to export and diff function signatures. @@ -31,8 +32,8 @@ class bcolors: BOLD = "\033[1m" -def get_error_string(error_message): - return f""" +def get_error_string(error_message, markdown=False): + return f"""{"> [!CAUTION]" if markdown else ""} Incompatible changes in function signatures have been detected. {error_message} @@ -41,6 +42,24 @@ def get_error_string(error_message): """ +def set_output(name: str, value: str): + """Sets a Github Actions output variable. Only single line values are supported.""" + if "\n" in value: + raise ValueError("Only single line values are supported.") + + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"{name}={value}\n") + + +def show_error(error_message): + if "GITHUB_ACTIONS" in os.environ: + with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: + f.writelines(get_error_string(error_message, markdown=True)) + + else: + print(get_error_string(error_message)) + + def export(args): """Exports Velox function signatures.""" pv.clear_signatures() @@ -82,28 +101,28 @@ def diff_signatures(base_signatures, contender_signatures): error_message += ( f"""Function '{dic_removed.get_root_key()}' has been removed.\n""" ) - print(get_error_string(error_message)) + show_error(error_message) exit_status = 1 if "values_changed" in delta: error_message = "" for value_change in delta["values_changed"]: error_message += f"""'{value_change.get_root_key()}{value_change.t1}' is changed to '{value_change.get_root_key()}{value_change.t2}'.\n""" - print(get_error_string(error_message)) + show_error(error_message) exit_status = 1 if "repetition_change" in delta: error_message = "" for rep_change in delta["repetition_change"]: error_message += f"""'{rep_change.get_root_key()}{rep_change.t1}' is repeated {rep_change.repetition['new_repeat']} times.\n""" - print(get_error_string(error_message)) + show_error(error_message) exit_status = 1 if "iterable_item_removed" in delta: error_message = "" for iter_change in delta["iterable_item_removed"]: error_message += f"""{iter_change.get_root_key()} has its function signature '{iter_change.t1}' removed.\n""" - print(get_error_string(error_message)) + show_error(error_message) exit_status = 1 else: @@ -156,6 +175,45 @@ def bias_signatures(base_signatures, contender_signatures, tickets): return "", status +def gh_bias_check(args): + """ + Exports signatures for the given group(s) and checks them for changes compared to a baseline. + Saves the results to a file and sets a Github Actions Output for each group. + """ + if not os.getenv("GITHUB_ACTIONS"): + print("This command is meant to be run in a Github Actions environment.") + return 1 + + # export signatures for each group + for group in args.group: + export_args = parse_args( + [ + "export", + f"--{group}", + os.path.join(args.signature_dir, group + args.contender_postfix), + ] + ) + export(export_args) + + # compare signatures for each group + for group in args.group: + bias_args = parse_args( + [ + "bias", + os.path.join(args.signature_dir, group + args.base_postfix), + os.path.join(args.signature_dir, group + args.contender_postfix), + os.path.join(args.signature_dir, group + args.output_postfix), + ] + ) + bias_status = bias(bias_args) + set_output(f"{group}_error", str(bias_status == 1).lower()) + + has_tickets = os.path.isfile( + os.path.join(args.signature_dir, group + args.output_postfix) + ) + set_output(f"{group}_functions", str(has_tickets).lower()) + + def get_tickets(val): tickets = int(val) if tickets < 0: @@ -188,6 +246,26 @@ def parse_args(args): bias_command_parser.add_argument( "ticket_value", type=get_tickets, default=10, nargs="?" ) + gh_command_parser = command.add_parser("gh_bias_check") + gh_command_parser.add_argument( + "group", + nargs="+", + help='One or more group names to check for changed signatures. e.g. "spark" or "presto"', + type=str, + ) + gh_command_parser.add_argument( + "--signature_dir", type=str, default="/tmp/signatures" + ) + gh_command_parser.add_argument( + "--base_postfix", type=str, default="_signatures_main.json" + ) + gh_command_parser.add_argument( + "--contender_postfix", type=str, default="_signatures_contender.json" + ) + gh_command_parser.add_argument( + "--output_postfix", type=str, default="_bias_functions" + ) + parser.set_defaults(command="help") return parser.parse_args(args) From 34b8751d93ab7bd3772431467e2fc3fdd5085b81 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 27 Mar 2024 05:59:27 +0100 Subject: [PATCH 31/51] fix stash retention days --- .github/workflows/scheduled.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 52942a67cc4e..01adbffe00b9 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -181,7 +181,6 @@ jobs: with: path: "${{ env.CCACHE_DIR }}" key: ccache-fuzzer-centos - retention-days: "${{ env.RETENTION }}" - name: Build PyVelox env: From f1dd3a3f955aaa639099694f3ce930acafa21040 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Wed, 27 Mar 2024 18:06:22 +0100 Subject: [PATCH 32/51] don't use argsparse.FileType to better manage file open/close --- scripts/signature.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/scripts/signature.py b/scripts/signature.py index 313c39466e79..d19a08e8253e 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -78,7 +78,9 @@ def export(args): jsoned_signatures[key] = [str(value) for value in signatures[key]] # Persist to file - json.dump(jsoned_signatures, args.output_file) + with open(args.output_file, "w") as f: + json.dump(jsoned_signatures, f) + return 0 @@ -133,14 +135,21 @@ def diff_signatures(base_signatures, contender_signatures): def diff(args): """Diffs Velox function signatures.""" - base_signatures = json.load(args.base) - contender_signatures = json.load(args.contender) + with open(args.base) as f: + base_signatures = json.load(f) + + with open(args.contender) as f: + contender_signatures = json.load(f) return diff_signatures(base_signatures, contender_signatures)[1] def bias(args): - base_signatures = json.load(args.base) - contender_signatures = json.load(args.contender) + with open(args.base) as f: + base_signatures = json.load(f) + + with open(args.contender) as f: + contender_signatures = json.load(f) + tickets = args.ticket_value bias_output, status = bias_signatures( base_signatures, contender_signatures, tickets @@ -186,6 +195,7 @@ def gh_bias_check(args): # export signatures for each group for group in args.group: + print(f"Exporting {group} signatures...") export_args = parse_args( [ "export", @@ -197,6 +207,7 @@ def gh_bias_check(args): # compare signatures for each group for group in args.group: + print(f"Comparing {group} signatures...") bias_args = parse_args( [ "bias", @@ -233,16 +244,16 @@ def parse_args(args): export_command_parser = command.add_parser("export") export_command_parser.add_argument("--spark", action="store_true") export_command_parser.add_argument("--presto", action="store_true") - export_command_parser.add_argument("output_file", type=argparse.FileType("w")) + export_command_parser.add_argument("output_file", type=str) diff_command_parser = command.add_parser("diff") - diff_command_parser.add_argument("base", type=argparse.FileType("r")) - diff_command_parser.add_argument("contender", type=argparse.FileType("r")) + diff_command_parser.add_argument("base", type=str) + diff_command_parser.add_argument("contender", type=str) bias_command_parser = command.add_parser("bias") - bias_command_parser.add_argument("base", type=argparse.FileType("r")) - bias_command_parser.add_argument("contender", type=argparse.FileType("r")) - bias_command_parser.add_argument("output_path") + bias_command_parser.add_argument("base", type=str) + bias_command_parser.add_argument("contender", type=str) + bias_command_parser.add_argument("output_path", type=str) bias_command_parser.add_argument( "ticket_value", type=get_tickets, default=10, nargs="?" ) From 5f3a2af282228c36900a6e255be2a2ec17adfc2c Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Wed, 27 Mar 2024 21:07:12 -0700 Subject: [PATCH 33/51] Removing a function signature for test purposes. --- .../registration/ArithmeticFunctionsRegistration.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp index 3984366fc9ae..0c6b0b845516 100644 --- a/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp @@ -168,8 +168,9 @@ void registerSimpleFunctions(const std::string& prefix) { {prefix + "poisson_cdf"}); registerFunction( {prefix + "gamma_cdf"}); - registerFunction( - {prefix + "laplace_cdf"}); + // Removed for TEST Purposes, do not merge !! +// registerFunction( +// {prefix + "laplace_cdf"}); registerFunction< WilsonIntervalUpperFunction, double, From d0070d6073b9b9d96afe52e1b588232c21b93432 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 01:36:09 +0100 Subject: [PATCH 34/51] revert makefile changes --- Makefile | 4 ++-- scripts/signature.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 7b20cff4ddb6..2bab08bce1ab 100644 --- a/Makefile +++ b/Makefile @@ -91,7 +91,7 @@ build: #: Build the software based in BUILD_DIR and BUILD_TYPE variables cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) -j $(NUM_THREADS) debug: #: Build with debugging symbols - $(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=Debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS}" + $(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=Debug $(MAKE) build BUILD_DIR=debug -j ${NUM_THREADS} release: #: Build the release version @@ -185,6 +185,6 @@ python-clean: python-build: DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=${NUM_THREADS} ${PYTHON_EXECUTABLE} -m pip install -e .$(extras) --verbose -python-test: +python-test: $(MAKE) python-build extras="[tests]" DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v diff --git a/scripts/signature.py b/scripts/signature.py index d19a08e8253e..9d1302625455 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -39,6 +39,7 @@ def get_error_string(error_message, markdown=False): {error_message} Changing or removing function signatures breaks backwards compatibility as some users may rely on function signatures that no longer exist. + """ From 15f859e7325b2c611093484b6d43e9b97df88bf6 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 01:36:16 +0100 Subject: [PATCH 35/51] fix output names --- .github/workflows/scheduled.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 01adbffe00b9..2d9125e83ddf 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -84,7 +84,7 @@ jobs: working-directory: velox outputs: presto_bias: ${{ steps.sig-check.outputs.presto_functions }} - presto_erro: ${{ steps.sig-check.outputs.presto_error }} + presto_error: ${{ steps.sig-check.outputs.presto_error }} spark_bias: ${{ steps.sig-check.outputs.spark_functions }} spark_error: ${{ steps.sig-check.outputs.spark_error }} @@ -322,7 +322,7 @@ jobs: runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile - if: ${{ needs.compile.outputs.presto_functions == 'true' }} + if: ${{ needs.compile.outputs.presto_bias == 'true' }} timeout-minutes: 120 steps: @@ -407,7 +407,7 @@ jobs: runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile - if: ${{ needs.compile.outputs.spark_functions == 'true' }} + if: ${{ needs.compile.outputs.spark_bias == 'true' }} timeout-minutes: 120 steps: @@ -646,7 +646,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Surface Presto function signature errors - if: ${{ needs.compile.outputs.presto_error }} + if: ${{ needs.compile.outputs.presto_error == 'true' }} run: | echo "Found incompatible changes in presto function signatures. Please check the summary for details." exit 1 From 44dca5c3ea0e1743f410552c4e780685bb9e395b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 01:43:01 +0100 Subject: [PATCH 36/51] remove ccache_basedir --- .github/workflows/scheduled.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 2d9125e83ddf..a758922e500e 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -129,8 +129,6 @@ jobs: - name: Build PyVelox if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} - env: - CCACHE_BASEDIR: "/__w/velox/velox/velox_main" working-directory: velox_main run: | python3 -m venv .venv @@ -167,7 +165,6 @@ jobs: - name: Build env: - CCACHE_BASEDIR: "/__w/velox/velox/velox" EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" run: | EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS" @@ -600,7 +597,6 @@ jobs: if: ${{ github.event_name != 'pull_request' }} env: CCACHE_DIR: "${{ github.workspace }}/.ccache/" - CCACHE_BASEDIR: "${{ github.workspace }}" LINUX_DISTRO: "centos" steps: From 6c2c3e885ad0ce446d1c4e15f93ce3ec781d1a0e Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 05:56:28 +0100 Subject: [PATCH 37/51] trigger ci From 515e7eebe97405a493070e76c1764e04f6ac5a34 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 06:44:32 +0100 Subject: [PATCH 38/51] move errors from step summary to log --- .github/workflows/scheduled.yml | 11 +++++++++-- scripts/signature.py | 33 +++++++++++++++++---------------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index a758922e500e..71564b579880 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -74,7 +74,7 @@ jobs: container: ghcr.io/facebookincubator/velox-dev:centos8 timeout-minutes: 120 env: - CCACHE_DIR: "/__w/velox/velox/.ccache/" + CCACHE_DIR: "/__w/velox/velox/.ccache" LINUX_DISTRO: "ubuntu" MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 8 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 4 }}" @@ -641,9 +641,16 @@ jobs: needs: compile runs-on: ubuntu-latest steps: + - name: Download Signatures + uses: actions/download-artifact@v4 + with: + name: signatures + path: /tmp/signatures + - name: Surface Presto function signature errors if: ${{ needs.compile.outputs.presto_error == 'true' }} run: | - echo "Found incompatible changes in presto function signatures. Please check the summary for details." + echo "Found incompatible changes in presto function signatures:" + cat /tmp/signatures/presto_errors exit 1 diff --git a/scripts/signature.py b/scripts/signature.py index 9d1302625455..60f08cb7bdab 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -32,8 +32,8 @@ class bcolors: BOLD = "\033[1m" -def get_error_string(error_message, markdown=False): - return f"""{"> [!CAUTION]" if markdown else ""} +def get_error_string(error_message): + return f""" Incompatible changes in function signatures have been detected. {error_message} @@ -52,13 +52,12 @@ def set_output(name: str, value: str): f.write(f"{name}={value}\n") -def show_error(error_message): - if "GITHUB_ACTIONS" in os.environ: - with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: - f.writelines(get_error_string(error_message, markdown=True)) +def show_error(error_message, error_path): + if error_path: + with open(error_path) as f: + f.writelines(get_error_string(error_message)) - else: - print(get_error_string(error_message)) + print(get_error_string(error_message)) def export(args): @@ -85,7 +84,7 @@ def export(args): return 0 -def diff_signatures(base_signatures, contender_signatures): +def diff_signatures(base_signatures, contender_signatures, error_path=""): """Diffs Velox function signatures. Returns a tuple of the delta diff and exit status""" delta = DeepDiff( @@ -104,28 +103,28 @@ def diff_signatures(base_signatures, contender_signatures): error_message += ( f"""Function '{dic_removed.get_root_key()}' has been removed.\n""" ) - show_error(error_message) + show_error(error_message, error_path) exit_status = 1 if "values_changed" in delta: error_message = "" for value_change in delta["values_changed"]: error_message += f"""'{value_change.get_root_key()}{value_change.t1}' is changed to '{value_change.get_root_key()}{value_change.t2}'.\n""" - show_error(error_message) + show_error(error_message, error_path) exit_status = 1 if "repetition_change" in delta: error_message = "" for rep_change in delta["repetition_change"]: error_message += f"""'{rep_change.get_root_key()}{rep_change.t1}' is repeated {rep_change.repetition['new_repeat']} times.\n""" - show_error(error_message) + show_error(error_message, error_path) exit_status = 1 if "iterable_item_removed" in delta: error_message = "" for iter_change in delta["iterable_item_removed"]: error_message += f"""{iter_change.get_root_key()} has its function signature '{iter_change.t1}' removed.\n""" - show_error(error_message) + show_error(error_message, error_path) exit_status = 1 else: @@ -153,7 +152,7 @@ def bias(args): tickets = args.ticket_value bias_output, status = bias_signatures( - base_signatures, contender_signatures, tickets + base_signatures, contender_signatures, tickets, args.error_path ) if bias_output: @@ -163,12 +162,12 @@ def bias(args): return status -def bias_signatures(base_signatures, contender_signatures, tickets): +def bias_signatures(base_signatures, contender_signatures, tickets, error_path): """Returns newly added functions as string and a status flag. Newly added functions are biased like so `fn_name1=,fn_name2=`. If it detects incompatible changes returns 1 in the status. """ - delta, status = diff_signatures(base_signatures, contender_signatures) + delta, status = diff_signatures(base_signatures, contender_signatures, error_path) if not delta: print(f"{bcolors.BOLD} No changes detected: Nothing to do!") @@ -215,6 +214,7 @@ def gh_bias_check(args): os.path.join(args.signature_dir, group + args.base_postfix), os.path.join(args.signature_dir, group + args.contender_postfix), os.path.join(args.signature_dir, group + args.output_postfix), + os.path.join(args.signature_dir, group + "_errors"), ] ) bias_status = bias(bias_args) @@ -258,6 +258,7 @@ def parse_args(args): bias_command_parser.add_argument( "ticket_value", type=get_tickets, default=10, nargs="?" ) + bias_command_parser.add_argument("error_path", type=str, default="") gh_command_parser = command.add_parser("gh_bias_check") gh_command_parser.add_argument( "group", From 4e3a08c6de89a9c47a83dc70bf07c05dd4d564f6 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 07:55:44 +0100 Subject: [PATCH 39/51] fix error file permission --- scripts/signature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/signature.py b/scripts/signature.py index 60f08cb7bdab..f31495cb62b8 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -54,7 +54,7 @@ def set_output(name: str, value: str): def show_error(error_message, error_path): if error_path: - with open(error_path) as f: + with open(error_path, "a+") as f: f.writelines(get_error_string(error_message)) print(get_error_string(error_message)) From 081ac530c0084ddf55987c095f030c216fc790c0 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 07:57:32 +0100 Subject: [PATCH 40/51] debug ccache --- .github/workflows/scheduled.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 71564b579880..f77e75d133d9 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -166,6 +166,7 @@ jobs: - name: Build env: EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" + CCACHE_DEBUG: TRUE run: | EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS" make debug From bbec6e5bd967e6ac206b00dbd0df467b36e7003a Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 18:32:52 +0100 Subject: [PATCH 41/51] reduce amount of fuzzer logs produced --- .github/workflows/scheduled.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index f77e75d133d9..7ec1cf57311d 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -303,7 +303,7 @@ jobs: --enable_dereference \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_persist_path=/tmp/fuzzer_repro \ && echo -e "\n\nFuzzer run finished successfully." @@ -354,7 +354,7 @@ jobs: --retry_with_try \ --enable_dereference \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_persist_path=/tmp/presto_fuzzer_repro \ && echo -e "\n\nPresto Fuzzer run finished successfully." @@ -388,7 +388,7 @@ jobs: --seed ${RANDOM} \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \ && echo -e "\n\nSpark Aggregation Fuzzer run finished successfully." @@ -430,7 +430,7 @@ jobs: --seed ${RANDOM} \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --assign_function_tickets $(cat /tmp/signatures/spark_bias_functions) \ --repro_persist_path=/tmp/spark_fuzzer_repro \ && echo -e "\n\nSpark Fuzzer run finished successfully." @@ -472,7 +472,7 @@ jobs: --enable_dereference \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_persist_path=/tmp/spark_fuzzer_repro \ && echo -e "\n\nSpark Fuzzer run finished successfully." @@ -507,7 +507,7 @@ jobs: --seed ${RANDOM} \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_persist_path=/tmp/aggregate_fuzzer_repro \ && echo -e "\n\nAggregation fuzzer run finished successfully." @@ -542,7 +542,7 @@ jobs: --seed ${RANDOM} \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ && echo -e "\n\nAggregation fuzzer run finished successfully." - name: Archive aggregate production artifacts @@ -577,7 +577,7 @@ jobs: --seed ${RANDOM} \ --duration_sec ${{ env.DURATION }} \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_path=/tmp/exchange_fuzzer_repro \ && echo -e "\n\Exchange fuzzer run finished successfully." @@ -622,7 +622,7 @@ jobs: --seed ${RANDOM} \ --duration_sec $DURATION \ --logtostderr=1 \ - --minloglevel=0 \ + --minloglevel=1 \ --repro_persist_path=/tmp/aggregate_fuzzer_repro \ --enable_sorted_aggregations=true \ --presto_url=http://127.0.0.1:8080 \ From d16ee3c3342a56075da97fddd264d3dbab88dde9 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 19:58:01 +0100 Subject: [PATCH 42/51] typo --- .github/workflows/scheduled.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 7ec1cf57311d..23365c0e91f1 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -335,7 +335,7 @@ jobs: name: signatures path: /tmp/signatures - - name: Run Spark Expression Fuzzer + - name: Run Presto Expression Fuzzer run: | ls /tmp/signatures mkdir -p /tmp/presto_fuzzer_repro/ From 5bb971cc1900e21b01393474ca896ed049125ca8 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 19:58:04 +0100 Subject: [PATCH 43/51] Revert "Removing a function signature for test purposes." This reverts commit 9123524ff1b4556518ff69823913badc520edc14. --- .../registration/ArithmeticFunctionsRegistration.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp index 0c6b0b845516..3984366fc9ae 100644 --- a/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp +++ b/velox/functions/prestosql/registration/ArithmeticFunctionsRegistration.cpp @@ -168,9 +168,8 @@ void registerSimpleFunctions(const std::string& prefix) { {prefix + "poisson_cdf"}); registerFunction( {prefix + "gamma_cdf"}); - // Removed for TEST Purposes, do not merge !! -// registerFunction( -// {prefix + "laplace_cdf"}); + registerFunction( + {prefix + "laplace_cdf"}); registerFunction< WilsonIntervalUpperFunction, double, From 8465268b24ae2cab33bca143bca03afa58d2ff2e Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 19:58:37 +0100 Subject: [PATCH 44/51] remove redundant error call out --- .github/workflows/scheduled.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 23365c0e91f1..16042064a446 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -651,7 +651,6 @@ jobs: - name: Surface Presto function signature errors if: ${{ needs.compile.outputs.presto_error == 'true' }} run: | - echo "Found incompatible changes in presto function signatures:" cat /tmp/signatures/presto_errors exit 1 From 3261b1749784cd60d5fa411407939be9e8db5a1b Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 19:58:54 +0100 Subject: [PATCH 45/51] remove ccache debug --- .github/workflows/scheduled.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 16042064a446..3a5e8d03b205 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -166,7 +166,6 @@ jobs: - name: Build env: EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" - CCACHE_DEBUG: TRUE run: | EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS" make debug From c27b5069356013429f2171ca2f7923658a8f9371 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 20:22:48 +0100 Subject: [PATCH 46/51] remove remaining CCI files --- .circleci/README.md | 89 ----- .circleci/config.yml | 44 -- .circleci/dist_compile.yml | 532 ------------------------- .github/workflows/linux-build.yml | 2 +- scripts/adapters.dockerfile | 2 +- {.circleci => scripts}/hdfs-client.xml | 0 6 files changed, 2 insertions(+), 667 deletions(-) delete mode 100644 .circleci/README.md delete mode 100644 .circleci/config.yml delete mode 100644 .circleci/dist_compile.yml rename {.circleci => scripts}/hdfs-client.xml (100%) diff --git a/.circleci/README.md b/.circleci/README.md deleted file mode 100644 index 31e80ebd3450..000000000000 --- a/.circleci/README.md +++ /dev/null @@ -1,89 +0,0 @@ -CircleCi integration is controlled by the `./circleci/config.yml` file. Our -config currently contains two workflows. One is triggered on every pull request update. -The other workflow runs nightly to verify our compatibility with prestodb internal protocol. - -The PR workflow is named `dist-compile` and has 4 jobs, 2 to build and run unit tests on linux and macos -and 2 to check code formatting and license headers: -* linux-build -* macos-build -* format-check -* header-check - -## Running locally - -The linux container based jobs can be run locally using the `circleci` cli: - -``` - circleci local execute --job JOB_NAME -``` - -For example to run unit tests use: - -``` - circleci local execute --job linux-build -``` - -A Nightly build with prestodb/master sync checks that the presto_protocol library -remains in sync with Presto Java. - -Run the nightly sync job locally: -``` - circleci local execute --job presto-sync -``` - -## Install CircleCi cli -``` - curl -fLSs https://circle.ci/cli | bash -``` - -To use containers Docker must be installed. Here are instructions to [Install -Docker on macos](https://docs.docker.com/docker-for-mac/install/). Docker deamon -must be running before issuing the `circleci` commands. - -### Macos testing - -Macos testing is done by using the CircleCi macos executor and installing -dependencies each time the job is run. This executor cannot be run locally. -The script `scripts/setup-macos.sh` contains commands that are run as part of -this job to install these dependencies. - -### Linux testing - -Linux testing uses a Docker container. The container build depends on the Velox CircleCi container. Check -velox/.circleci/config.yml to see that the base container in circleci-container.dockfile is using the latest. -The container build uses Docker and should be run on your macos or linux laptop with Docker installed and -running. - -#### Build the base container: - -* In an up-to-date clone of velox (maybe you have one?) - -``` -git clone git@github.com:facebookincubator/velox.git -cd velox -make base-container -``` -* Wait - This step takes rather a long time. It is building clang-format v8 to be compatible with fbcode -* When the base container is finished the new container name will be printed on the console. -* Push the container to DockerHub -``` -docker push prestocpp/base-container:$USER-YYYYMMDD -``` -* After the push, update `scripts/velox-container.dockfile` with the newly build base container name - -#### Build the dependencies container - -* If you have a new base-container update scripts/velox-container.dockfile to refer to it -* Build the velox container -``` -make velox-container.dockfile -``` -* Wait - This takes a few minutes, but not nearly as long as the base container. -* When the velox container is finished the new container name will be printed on the console. -* Push the container to DockerHub -``` -docker push prestocpp/velox-container:$USER-YYYYMMDD -``` -* Update `.circleci/config.yml` with the newly built circleci container name. - There are two places in the config.yml file that refer to the container, update - both. diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 303f1356521e..000000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 2.1 - -# This allows us to use CircleCI's dynamic configuration feature -setup: true - - -# Path-filtering orb is required to continue a pipeline based on -# the path of an updated fileset -orbs: - path-filtering: circleci/path-filtering@0.1.1 - -workflows: - version: 2 - - path-filtering-workflow: - jobs: - - - path-filtering/filter: - name: check-sensitive-paths - - # Format is: - # Regex below will filter out paths with test in them. - mapping: | - velox/expression/((?!.*test).*).* run-longer-expression-fuzzer true - velox/exec/((?!.*test).*).* run-longer-expression-fuzzer true - velox/common/((?!.*test).*).* run-longer-expression-fuzzer true - velox/core/((?!.*test).*).* run-longer-expression-fuzzer true - velox/vector/((?!.*test).*).* run-longer-expression-fuzzer true - - config-path: .circleci/dist_compile.yml diff --git a/.circleci/dist_compile.yml b/.circleci/dist_compile.yml deleted file mode 100644 index 7d607d100a97..000000000000 --- a/.circleci/dist_compile.yml +++ /dev/null @@ -1,532 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -version: 2.1 - - -# Default pipeline parameters, which will be updated according to -# the results of the path-filtering orb -parameters: - run-longer-expression-fuzzer: - type: boolean - default: false - -commands: - update-submodules: - steps: - - run: - name: "Update Submodules" - command: | - git submodule sync --recursive - git submodule update --init --recursive - - setup-environment: - steps: - - run: - name: "Setup Environment" - command: | - # Calculate ccache key. - git show -s --format=%cd --date="format:%Y%m%d" $(git merge-base origin/main HEAD) | tee merge-base-date - - # Set up xml gtest output. - mkdir -p /tmp/test_xml_output/ - echo "export XML_OUTPUT_FILE=\"/tmp/test_xml_output/\"" >> $BASH_ENV - - # Set up ccache configs. - mkdir -p .ccache - echo "export CCACHE_DIR=$(realpath .ccache)" >> $BASH_ENV - ccache -sz -M 5Gi - if [ -e /opt/rh/gcc-toolset-9/enable ]; then - source /opt/rh/gcc-toolset-9/enable - fi - - restore_cache: - name: "Restore CCache Cache" - keys: - - velox-ccache-debug-{{ arch }}-{{ checksum "merge-base-date" }} - - pre-steps: - steps: - - checkout - - update-submodules - - setup-environment - - post-steps: - steps: - - save_cache: - name: "Save CCache Cache" - key: velox-ccache-debug-{{ arch }}-{{ checksum "merge-base-date" }} - paths: - - .ccache/ - - store_artifacts: - path: '_build/debug/.ninja_log' - - store_test_results: - path: '/tmp/test_xml_output/' - - build-benchmarks: - parameters: - binary_output: - type: string - benchmark_class: - type: string - steps: - - run: - name: "Build Benchmarks - << parameters.benchmark_class >>" - command: | - make benchmarks-basic-build NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4 - ccache -s - mkdir -p << parameters.binary_output >> - cp -r --verbose _build/release/velox/benchmarks/basic/* << parameters.binary_output >> - - fuzzer-run: - parameters: - fuzzer_repro: - type: string - fuzzer_output: - type: string - fuzzer_name: - type: string - fuzzer_exe: - type: string - fuzzer_args: - type: string - steps: - - pre-steps - - run: - name: Build - command: | - make debug NUM_THREADS=8 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=4 - ccache -s - no_output_timeout: 1h - - run: - name: "Run << parameters.fuzzer_name >> Fuzzer" - command: | - eval ' << parameters.fuzzer_exe >> << parameters.fuzzer_args >> ' \ - 2>&1 | tee "<< parameters.fuzzer_output >>" || ( \ - tail -n 1000 "<< parameters.fuzzer_output >>" ; \ - echo "FAIL: << parameters.fuzzer_name >> run failed"; \ - exit 1; \ - ) - echo -e "\n << parameters.fuzzer_name >> run finished successfully." - no_output_timeout: 120m - - store_artifacts: - path: << parameters.fuzzer_output >> - - store_artifacts: - path: << parameters.fuzzer_repro >> - - post-steps - -executors: - build: - docker: - - image : ghcr.io/facebookincubator/velox-dev:circleci-avx - resource_class: 2xlarge - environment: - CC: /opt/rh/gcc-toolset-9/root/bin/gcc - CXX: /opt/rh/gcc-toolset-9/root/bin/g++ - VELOX_DEPENDENCY_SOURCE: BUNDLED - simdjson_SOURCE: BUNDLED - check: - docker: - - image : ghcr.io/facebookincubator/velox-dev:check-avx - -jobs: - linux-build: - executor: build - environment: - DuckDB_SOURCE: SYSTEM - steps: - - pre-steps - - run: - name: "Build" - command: | - make debug NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4 EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON" - ccache -s - no_output_timeout: 1h - - run: - name: "Run Unit Tests" - command: | - cd _build/debug && ctest -j 16 -VV --output-on-failure --no-tests=error - no_output_timeout: 1h - - store_test_results: - path: /tmp/test_xml_output/ - - run: - name: "Run Fuzzer Tests" - # Run fuzzer using the built executable - we do this instead of make - # since currently make fuzzertest tends to rebuild the project. - command: | - mkdir -p /tmp/fuzzer_repro/ - chmod -R 777 /tmp/fuzzer_repro - _build/debug/velox/expression/tests/velox_expression_fuzzer_test \ - --seed ${RANDOM} \ - --enable_variadic_signatures \ - --velox_fuzzer_enable_complex_types \ - --lazy_vector_generation_ratio 0.2 \ - --velox_fuzzer_enable_column_reuse \ - --velox_fuzzer_enable_expression_reuse \ - --max_expression_trees_per_step 2 \ - --retry_with_try \ - --enable_dereference \ - --duration_sec 60 \ - --logtostderr=1 \ - --minloglevel=0 \ - --repro_persist_path=/tmp/fuzzer_repro \ - && echo -e "\n\nFuzzer run finished successfully." - no_output_timeout: 5m - - store_artifacts: - path: '/tmp/fuzzer_repro' - - run: - name: "Run Spark Fuzzer Tests" - command: | - mkdir -p /tmp/spark_fuzzer_repro/ - chmod -R 777 /tmp/spark_fuzzer_repro - _build/debug/velox/expression/tests/spark_expression_fuzzer_test \ - --seed ${RANDOM} \ - --duration_sec 60 \ - --enable_variadic_signatures \ - --lazy_vector_generation_ratio 0.2 \ - --velox_fuzzer_enable_column_reuse \ - --velox_fuzzer_enable_expression_reuse \ - --max_expression_trees_per_step 2 \ - --retry_with_try \ - --enable_dereference \ - --logtostderr=1 \ - --minloglevel=0 \ - --repro_persist_path=/tmp/spark_fuzzer_repro \ - && echo -e "\n\nSpark Fuzzer run finished successfully." - no_output_timeout: 5m - - store_artifacts: - path: '/tmp/spark_fuzzer_repro' - - run: - name: "Run Spark Aggregate Fuzzer Tests" - command: | - mkdir -p /tmp/spark_aggregate_fuzzer_repro/ - chmod -R 777 /tmp/spark_aggregate_fuzzer_repro - _build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test \ - --seed ${RANDOM} \ - --duration_sec 60 \ - --logtostderr=1 \ - --minloglevel=0 \ - --repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \ - && echo -e "\n\nSpark Aggregation Fuzzer run finished successfully." - no_output_timeout: 5m - - store_artifacts: - path: '/tmp/spark_aggregate_fuzzer_repro' - - run: - name: "Run Aggregate Fuzzer Tests" - # Run aggregation fuzzer using the built executable. - command: | - mkdir -p /tmp/aggregate_fuzzer_repro/ - rm -rfv /tmp/aggregate_fuzzer_repro/* - chmod -R 777 /tmp/aggregate_fuzzer_repro - _build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test \ - --seed ${RANDOM} \ - --duration_sec 1800 \ - --logtostderr=1 \ - --minloglevel=0 \ - --repro_persist_path=/tmp/aggregate_fuzzer_repro \ - && echo -e "\n\nAggregation fuzzer run finished successfully." - no_output_timeout: 5m - - store_artifacts: - path: '/tmp/aggregate_fuzzer_repro' - - run: - name: "Run Join Fuzzer Tests" - command: | - _build/debug/velox/exec/tests/velox_join_fuzzer_test \ - --seed ${RANDOM} \ - --duration_sec 1800 \ - --logtostderr=1 \ - --minloglevel=0 \ - && echo -e "\n\nJoin fuzzer run finished successfully." - no_output_timeout: 5m - - run: - name: "Run Example Binaries" - command: | - find _build/debug/velox/examples/ -maxdepth 1 -type f -executable -exec "{}" \; - - post-steps - - linux-build-release: - executor: build - steps: - - pre-steps - - run: - name: Build - command: | - make release NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=8 - ccache -s - no_output_timeout: 1h - - run: - name: "Run Unit Tests" - command: | - cd _build/release && ctest -j 16 -VV --output-on-failure --no-tests=error - no_output_timeout: 1h - - post-steps - - # Build with different options - linux-build-options: - executor: build - steps: - - pre-steps - - run: - name: "Build Velox Minimal" - command: | - make min_debug NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=16 - ccache -s - no_output_timeout: 1h - - run: - name: "Build Velox With Benchmarks and Without Testing" - command: | - make benchmarks-build NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4 - no_output_timeout: 1h - - post-steps - - linux-adapters: - executor: build - environment: - VELOX_DEPENDENCY_SOURCE: SYSTEM - ICU_SOURCE: BUNDLED - simdjson_SOURCE: BUNDLED - xsimd_SOURCE: BUNDLED - DuckDB_SOURCE: SYSTEM - steps: - - pre-steps - - run: - name: "Install Java for Hadoop" - command: | - set -xu - yum -y install java-1.8.0-openjdk - - run: - name: Build including all Benchmarks - command: | - EXTRA_CMAKE_FLAGS=( - "-DVELOX_ENABLE_BENCHMARKS=ON" - "-DVELOX_ENABLE_ARROW=ON" - "-DVELOX_ENABLE_PARQUET=ON" - "-DVELOX_ENABLE_HDFS=ON" - "-DVELOX_ENABLE_S3=ON" - "-DVELOX_ENABLE_GCS=ON" - "-DVELOX_ENABLE_ABFS=ON" - "-DVELOX_ENABLE_SUBSTRAIT=ON" - "-DVELOX_ENABLE_REMOTE_FUNCTIONS=ON" - ) - make release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS[*]}" NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=8 - ccache -s - no_output_timeout: 1h - - run: - name: "Run Unit Tests" - command: | - conda init bash - source ~/.bashrc - conda create -y --name testbench python=3.7 - conda activate testbench - pip install https://github.com/googleapis/storage-testbench/archive/refs/tags/v0.36.0.tar.gz - export LC_ALL=C - export JAVA_HOME=/usr/lib/jvm/jre-1.8.0-openjdk - export HADOOP_ROOT_LOGGER="WARN,DRFA" - export LIBHDFS3_CONF=$(pwd)/.circleci/hdfs-client.xml - export HADOOP_HOME='/usr/local/hadoop' - export PATH=/usr/local/hadoop/bin:${PATH} - # The following is used to install Azurite in the CI for running Abfs Hive Connector unit tests. - # Azurite is an emulator for local Azure Storage development, and it is a required component for running Abfs Hive Connector unit tests. - # It can be installed using npm. The following is used to install Node.js and npm for Azurite installation. - curl -sL https://rpm.nodesource.com/setup_10.x | bash - - yum install -y nodejs - npm install -g azurite - cd _build/release && ctest -j 16 -VV --output-on-failure --no-tests=error - no_output_timeout: 1h - - post-steps - - linux-presto-fuzzer-run: - executor: build - environment: - VELOX_DEPENDENCY_SOURCE: SYSTEM - simdjson_SOURCE: BUNDLED - xsimd_SOURCE: BUNDLED - DuckDB_SOURCE: BUNDLED - steps: - - fuzzer-run: - fuzzer_output: "/tmp/fuzzer.log" - fuzzer_repro: "/tmp/fuzzer_repro" - fuzzer_name: "Expression" - fuzzer_exe: "_build/debug/velox/expression/tests/velox_expression_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --lazy_vector_generation_ratio 0.2 \ - --duration_sec 1800 --enable_variadic_signatures \ - --velox_fuzzer_enable_complex_types \ - --velox_fuzzer_enable_column_reuse \ - --velox_fuzzer_enable_expression_reuse \ - --max_expression_trees_per_step 2 \ - --retry_with_try \ - --enable_dereference \ - --logtostderr=1 --minloglevel=0 \ - --repro_persist_path=/tmp/fuzzer_repro" - - linux-spark-fuzzer-run: - executor: build - environment: - VELOX_DEPENDENCY_SOURCE: SYSTEM - simdjson_SOURCE: BUNDLED - steps: - - fuzzer-run: - fuzzer_output: "/tmp/spark_fuzzer.log" - fuzzer_repro: "/tmp/spark_fuzzer_repro" - fuzzer_name: "Spark" - fuzzer_exe: "_build/debug/velox/expression/tests/spark_expression_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --duration_sec 600 --logtostderr=1 --minloglevel=0 \ - --repro_persist_path=/tmp/spark_fuzzer_repro" - - linux-spark-aggregate-fuzzer-run: - executor: build - environment: - VELOX_DEPENDENCY_SOURCE: SYSTEM - simdjson_SOURCE: BUNDLED - steps: - - fuzzer-run: - fuzzer_output: "/tmp/spark_aggregate_fuzzer.log" - fuzzer_repro: "/tmp/spark_aggregate_fuzzer_repro" - fuzzer_name: "SparkAggregate" - fuzzer_exe: "_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --duration_sec 600 --logtostderr=1 --minloglevel=0 \ - --repro_persist_path=/tmp/spark_aggregate_fuzzer_repro" - - - linux-aggregate-fuzzer-run: - executor: build - environment: - VELOX_DEPENDENCY_SOURCE: SYSTEM - simdjson_SOURCE: BUNDLED - steps: - - fuzzer-run: - fuzzer_output: "/tmp/aggregate_fuzzer.log" - fuzzer_repro: "/tmp/aggregate_fuzzer_repro" - fuzzer_name: "Aggregate" - fuzzer_exe: "_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --duration_sec 3600 --logtostderr=1 --minloglevel=0 \ - --repro_persist_path=/tmp/aggregate_fuzzer_repro" - - linux-join-fuzzer-run: - executor: build - environment: - VELOX_DEPENDENCY_SOURCE: SYSTEM - simdjson_SOURCE: BUNDLED - steps: - - fuzzer-run: - fuzzer_output: "/tmp/join_fuzzer.log" - fuzzer_repro: "/tmp/join_fuzzer_repro" - fuzzer_name: "Join" - fuzzer_exe: "_build/debug/velox/exec/tests/velox_join_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --duration_sec 3600 --logtostderr=1 --minloglevel=0" - - linux-pr-fuzzer-run: - executor: build - steps: - - pre-steps - - run: - name: "Get merge base function signatures" - command: | - source ~/.bashrc - conda create -y --name pyveloxenv python=3.7 - conda activate pyveloxenv - cp ./scripts/signature.py /tmp/signature.py - pip install deepdiff - git remote add upstream https://github.com/facebookincubator/velox - git fetch upstream - merge_base=$(git merge-base 'upstream/main' `git rev-parse HEAD`) || \ - { echo "::error::Failed to find merge_base"; exit 1; } - echo "Merge Base: $merge_base" - git checkout $merge_base - git submodule update --init --recursive - LD_LIBRARY_PATH=/usr/local/lib make python-clean - LD_LIBRARY_PATH=/usr/local/lib make python-build - python /tmp/signature.py export --spark spark_merge_base_signatures.json - python /tmp/signature.py export --presto presto_merge_base_signatures.json - - checkout - - run: - name: "Build" - command: | - make debug NUM_THREADS=8 MAX_HIGH_MEM_JOBS=4 MAX_LINK_JOBS=4 EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON" - ccache -s - no_output_timeout: 1h - - run: - name: "Build and test PyVelox" - command: | - conda init bash - source ~/.bashrc - conda activate pyveloxenv - LD_LIBRARY_PATH=/usr/local/lib make python-test - - run: - name: "Check and create bias function signatures" - command: | - source ~/.bashrc - conda activate pyveloxenv - pip install deepdiff - python ./scripts/signature.py export --presto presto_pr_signatures.json - python ./scripts/signature.py export --spark spark_pr_signatures.json - if python ./scripts/signature.py bias presto_merge_base_signatures.json presto_pr_signatures.json /tmp/presto_bias_functions 2>&1 > /tmp/presto-err-message; \ - then echo "Presto signature check success" ; else echo "Presto signature check failed" > /tmp/presto-signature-error-code ; fi - if python ./scripts/signature.py bias spark_merge_base_signatures.json spark_pr_signatures.json /tmp/spark_bias_functions ; \ - then echo "Spark signature check success"; else echo "Spark signature check failed" > /tmp/spark-signature-error-code ; fi - - - store_artifacts: - path: 'presto_merge_base_signatures.json' - - store_artifacts: - path: 'presto_pr_signatures.json' - - store_artifacts: - path: 'spark_merge_base_signatures.json' - - store_artifacts: - path: 'spark_pr_signatures.json' - - fuzzer-run: - fuzzer_output: "/tmp/fuzzer.log" - fuzzer_repro: "/tmp/fuzzer_repro" - fuzzer_name: "Expression Bias Run" - fuzzer_exe: "if [ -f /tmp/presto_bias_functions ]; then _build/debug/velox/expression/tests/velox_expression_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --lazy_vector_generation_ratio 0.2 \ - --assign_function_tickets $(cat /tmp/presto_bias_functions) \ - --duration_sec 3600 --enable_variadic_signatures \ - --velox_fuzzer_enable_complex_types \ - --velox_fuzzer_enable_column_reuse \ - --velox_fuzzer_enable_expression_reuse \ - --max_expression_trees_per_step 2 \ - --retry_with_try \ - --enable_dereference \ - --logtostderr=1 --minloglevel=0 \ - --repro_persist_path=/tmp/fuzzer_repro ; fi" - - - fuzzer-run: - fuzzer_output: "/tmp/spark_fuzzer.log" - fuzzer_repro: "/tmp/spark_fuzzer_repro" - fuzzer_name: "Spark Bias Run" - fuzzer_exe: "if [ -f /tmp/spark_bias_functions ]; then _build/debug/velox/expression/tests/spark_expression_fuzzer_test" - fuzzer_args: " --seed ${RANDOM} --duration_sec 3600 --logtostderr=1 --minloglevel=0 \ - --assign_function_tickets $(cat /tmp/spark_bias_functions) \ - --repro_persist_path=/tmp/spark_fuzzer_repro ; fi" - - - run: - name: "Surface only Presto function signature errors if any" - command: | - if [ -f /tmp/presto-signature-error-code ]; then \ - echo "Incompatible changes have been made to function signatures:\n"; \ - cat /tmp/presto-err-message ; \ - exit 1 ; \ - fi - - -workflows: - - longer-fuzzer: - when: << pipeline.parameters.run-longer-expression-fuzzer >> - jobs: - - linux-pr-fuzzer-run - - shorter-fuzzer: - unless: << pipeline.parameters.run-longer-expression-fuzzer >> - jobs: - - linux-pr-fuzzer-run diff --git a/.github/workflows/linux-build.yml b/.github/workflows/linux-build.yml index 366681d665b4..0c0db76f8214 100644 --- a/.github/workflows/linux-build.yml +++ b/.github/workflows/linux-build.yml @@ -103,7 +103,7 @@ jobs: # Some of the adapters dependencies are in the 'adapters' conda env shell: mamba run --no-capture-output -n adapters /usr/bin/bash -e {0} env: - LIBHDFS3_CONF: "/__w/velox/velox/.circleci/hdfs-client.xml" + LIBHDFS3_CONF: "/__w/velox/velox/scripts/hdfs-client.xml" working-directory: _build/release run: | ctest -j 8 --output-on-failure --no-tests=error diff --git a/scripts/adapters.dockerfile b/scripts/adapters.dockerfile index d7da3c088074..1b32fed7d695 100644 --- a/scripts/adapters.dockerfile +++ b/scripts/adapters.dockerfile @@ -38,7 +38,7 @@ RUN npm install -g azurite ENV HADOOP_HOME=/usr/local/hadoop \ HADOOP_ROOT_LOGGER="WARN,DRFA" \ LC_ALL=C \ - LIBHDFS3_CONF=/velox/.circleci/hdfs-client.xml \ + LIBHDFS3_CONF=/velox/scripts/hdfs-client.xml \ PATH=/usr/local/hadoop/bin:${PATH} ENTRYPOINT ["/bin/bash", "-c", "source /opt/rh/gcc-toolset-9/enable && exec \"$@\"", "--"] diff --git a/.circleci/hdfs-client.xml b/scripts/hdfs-client.xml similarity index 100% rename from .circleci/hdfs-client.xml rename to scripts/hdfs-client.xml From ee4db1468015f990ca2ea51871b241b47d6014fc Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 23:08:08 +0100 Subject: [PATCH 47/51] improve set_output --- scripts/signature.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/signature.py b/scripts/signature.py index f31495cb62b8..27d505619313 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -15,6 +15,7 @@ import json import os import sys +from typing import Any from deepdiff import DeepDiff @@ -43,8 +44,11 @@ def get_error_string(error_message): """ -def set_output(name: str, value: str): - """Sets a Github Actions output variable. Only single line values are supported.""" +def set_gh_output(name: str, value: Any): + """Sets a Github Actions output variable. Only single line values are supported. + value will be converted to a lower case string.""" + value = str(value).lower() + if "\n" in value: raise ValueError("Only single line values are supported.") @@ -217,13 +221,15 @@ def gh_bias_check(args): os.path.join(args.signature_dir, group + "_errors"), ] ) + bias_status = bias(bias_args) - set_output(f"{group}_error", str(bias_status == 1).lower()) + set_gh_output(f"{group}_error", bias_status == 1) + # check if there are any changes that require the bias fuzzer to run has_tickets = os.path.isfile( os.path.join(args.signature_dir, group + args.output_postfix) ) - set_output(f"{group}_functions", str(has_tickets).lower()) + set_gh_output(f"{group}_functions", has_tickets) def get_tickets(val): From af26c2540f6110649045ca6ae7f2262fb2ce8ffb Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Fri, 29 Mar 2024 23:18:29 +0100 Subject: [PATCH 48/51] remove python dev install --- .github/workflows/scheduled.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 3a5e8d03b205..e45e921de871 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -116,8 +116,6 @@ jobs: run: | mkdir -p '${{ env.CCACHE_DIR }}' mkdir -p /tmp/signatures - # this can be removed after #8917 - dnf install -q -y python39-devel - name: Checkout Main if: ${{ steps.get-sig.outputs.stash-hit != 'true' }} From 25e734f20c4c90f4aab04f166b5b9bb8815de720 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 30 Mar 2024 00:34:14 +0100 Subject: [PATCH 49/51] Only run bias or unbiased fuzzer not both --- .github/workflows/scheduled.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index e45e921de871..daad6cbb6113 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -243,6 +243,7 @@ jobs: presto-fuzzer-run: name: Presto Fuzzer + if: ${{ needs.compile.outputs.presto_bias != 'true' }} runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile @@ -442,6 +443,7 @@ jobs: spark-fuzzer: name: Spark Fuzzer + if: ${{ needs.compile.outputs.spark_bias != 'true' }} runs-on: ubuntu-latest container: ghcr.io/facebookincubator/velox-dev:centos8 needs: compile From 33a31d96e9281d03901a38e377054f87de779f33 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Sat, 30 Mar 2024 00:42:12 +0100 Subject: [PATCH 50/51] format --- scripts/signature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/signature.py b/scripts/signature.py index 27d505619313..e7acb1cd0aaa 100644 --- a/scripts/signature.py +++ b/scripts/signature.py @@ -229,7 +229,7 @@ def gh_bias_check(args): has_tickets = os.path.isfile( os.path.join(args.signature_dir, group + args.output_postfix) ) - set_gh_output(f"{group}_functions", has_tickets) + set_gh_output(f"{group}_functions", has_tickets) def get_tickets(val): From a0613c88cc19a8e59dbd4bd16dfd5478bbf61e36 Mon Sep 17 00:00:00 2001 From: Krishna Pai Date: Mon, 1 Apr 2024 11:23:05 -0700 Subject: [PATCH 51/51] Add a noop Circleci build so we can remove the error. --- .circleci/config.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000000..b121fa7dfd36 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,33 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: 2.1 + +# This allows us to use CircleCI's dynamic configuration feature +setup: true + +jobs: + noop-build: + docker: + - image: cimg/base:2024.02 + steps: + - run: circleci-agent step halt + +workflows: + version: 2 + + path-filtering-workflow: + jobs: + - noop-build +