Fix CI

facebookincubator · Jul 16, 2024 · 47f7796 · 47f7796
1 parent 176bb44
commit 47f7796
Show file tree

Hide file tree

Showing 18 changed files with 455 additions and 253 deletions.
diff --git a/.github/workflows/experimental.yml b/.github/workflows/experimental.yml
@@ -169,37 +169,54 @@ jobs:
             /tmp/aggregate_fuzzer_repro
             /tmp/server.log
 
-  linux-spark-fuzzer-run:
-    runs-on: ubuntu-latest
-    needs: compile
+  spark-java-aggregation-fuzzer-run:
+    runs-on: 8-core-ubuntu
+    container: ghcr.io/facebookincubator/velox-dev:spark-server
     timeout-minutes: 120
+    env:
+      CCACHE_DIR: "/__w/velox/velox/.ccache/"
+      LINUX_DISTRO: "centos"
     steps:
 
+      - name: "Restore ccache"
+        uses: actions/cache@v3
+        with:
+          path: "${{ env.CCACHE_DIR }}"
+          # We are using the benchmark ccache as it has all
+          # required features enabled, so no need to create a new one
+          key: ccache-spark-${{ github.sha }}
+          restore-keys: |
+            ccache-spark-
+
       - name: "Checkout Repo"
         uses: actions/checkout@v3
         with:
+          path: velox
+          submodules: 'recursive'
           ref: "${{ inputs.ref || 'main' }}"
 
-      - name: "Install dependencies"
-        run: source ./scripts/setup-ubuntu.sh && install_apt_deps
-
-      - name: Download spark aggregation fuzzer
-        uses: actions/download-artifact@v3
-        with:
-          name: spark_aggregation_fuzzer
+      - name: "Build"
+        run: |
+          cd velox
+          source /opt/rh/gcc-toolset-12/enable
+          make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
+          ccache -s
 
       - name: "Run Spark Aggregate Fuzzer"
         run: |
+          cd velox
+          bash /opt/start-spark.sh
+          # Sleep for 60 seconds to allow Spark server to start.
+          sleep 60
           mkdir -p /tmp/spark_aggregate_fuzzer_repro/
+          rm -rfv /tmp/spark_aggregate_fuzzer_repro/*
           chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
-          chmod +x spark_aggregation_fuzzer_test
-          ./spark_aggregation_fuzzer_test \
+          _build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test \
                 --seed ${RANDOM} \
-                --duration_sec 1800 \
+                --duration_sec 3600 \
                 --logtostderr=1 \
                 --minloglevel=0 \
                 --repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \
-                --enable_sorted_aggregations=true \
           && echo -e "\n\nSpark Aggregation Fuzzer run finished successfully."
 
       - name: Archive Spark aggregate production artifacts

diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml
@@ -454,9 +454,9 @@ jobs:
   spark-aggregate-fuzzer-run:
     name: Spark Aggregate Fuzzer
     runs-on: ubuntu-latest
-    container: ghcr.io/facebookincubator/velox-dev:centos9
+    container: ghcr.io/facebookincubator/velox-dev:spark-server
     needs: compile
-    timeout-minutes: 60
+    timeout-minutes: 120
     steps:
 
       - name: Download spark aggregation fuzzer
@@ -466,6 +466,9 @@ jobs:
 
       - name: Run Spark Aggregate Fuzzer
         run: |
+          bash /opt/start-spark.sh
+          # Sleep for 60 seconds to allow Spark server to start.
+          sleep 60
           mkdir -p /tmp/spark_aggregate_fuzzer_repro/logs/
           chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
           chmod +x spark_aggregation_fuzzer_test

diff --git a/CMake/Findc-ares.cmake b/CMake/Findc-ares.cmake
@@ -0,0 +1,40 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+find_package(c-ares CONFIG)
+if(c-ares_FOUND)
+  if(TARGET c-ares::cares)
+    return()
+  endif()
+endif()
+
+find_path(
+  C_ARES_INCLUDE_DIR
+  NAMES ares.h
+  PATH_SUFFIXES c-ares)
+find_library(C_ARES_LIBRARY NAMES c-ares)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(c-ares DEFAULT_MSG C_ARES_LIBRARY
+                                  C_ARES_INCLUDE_DIR)
+
+if(c-ares_FOUND AND NOT TARGET c-ares::cares)
+  add_library(c-ares::cares UNKNOWN IMPORTED)
+  set_target_properties(
+    cares::cares
+    PROPERTIES IMPORTED_LOCATION "${C_ARES_LIBRARY}"
+               INTERFACE_INCLUDE_DIRECTORIES "${C_ARES_INCLUDE_DIR}")
+endif()
+
+mark_as_advanced(C_ARES_INCLUDE_DIR C_ARES_LIBRARY)
diff --git a/CMake/resolve_dependency_modules/boringssl.cmake b/CMake/resolve_dependency_modules/boringssl.cmake
diff --git a/CMake/resolve_dependency_modules/cares.cmake → ...e/resolve_dependency_modules/c-ares.cmake b/CMake/resolve_dependency_modules/cares.cmake → ...e/resolve_dependency_modules/c-ares.cmake
@@ -25,13 +25,15 @@ resolve_dependency_url(CARES)
 message(STATUS "Building C-ARES from source")
 
 FetchContent_Declare(
-  cares
+  c-ares
   URL ${VELOX_CARES_SOURCE_URL}
   URL_HASH ${VELOX_CARES_BUILD_SHA256_CHECKSUM}
   OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM)
 
 set(CARES_STATIC ON)
 set(CARES_INSTALL ON)
 set(CARES_SHARED OFF)
-FetchContent_MakeAvailable(cares)
-add_library(cares::cares ALIAS c-ares)
+FetchContent_MakeAvailable(c-ares)
+if(NOT TARGET c-ares::cares)
+  add_library(c-ares::cares ALIAS c-ares)
+endif()
diff --git a/CMake/resolve_dependency_modules/curl.cmake b/CMake/resolve_dependency_modules/curl.cmake
@@ -24,7 +24,11 @@ string(
 
 resolve_dependency_url(CURL)
 
+set(PREVIOUS_BUILD_TYPE ${CMAKE_BUILD_TYPE})
+set(CMAKE_BUILD_TYPE Release)
 FetchContent_Declare(
   curl
   URL ${VELOX_CURL_SOURCE_URL}
   URL_HASH ${VELOX_CURL_BUILD_SHA256_CHECKSUM})
+FetchContent_MakeAvailable(curl)
+set(CMAKE_BUILD_TYPE ${PREVIOUS_BUILD_TYPE})
diff --git a/CMake/resolve_dependency_modules/folly/CMakeLists.txt b/CMake/resolve_dependency_modules/folly/CMakeLists.txt
@@ -65,6 +65,4 @@ if(${gflags_SOURCE} STREQUAL "BUNDLED")
   add_dependencies(folly glog gflags_static fmt::fmt)
 endif()
 
-set(FOLLY_BENCHMARK_STATIC_LIB
-    ${folly_BINARY_DIR}/folly/libfollybenchmark${CMAKE_STATIC_LIBRARY_SUFFIX}
-    PARENT_SCOPE)
+add_library(Folly::follybenchmark ALIAS follybenchmark)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -14,6 +14,8 @@
 cmake_minimum_required(VERSION 3.28)
 message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
 
+set(BUILD_SHARED_LIBS OFF)
+
 # The policy allows us to change options without caching.
 cmake_policy(SET CMP0077 NEW)
 set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
@@ -329,15 +331,16 @@ if("${ENABLE_ALL_WARNINGS}")
          -Wno-maybe-uninitialized \
          -Wno-unused-result \
          -Wno-format-overflow \
-         -Wno-strict-aliasing")
+         -Wno-strict-aliasing \
+         -Wno-mismatched-new-delete")
   endif()
 
   set(KNOWN_WARNINGS
       "-Wno-unused \
        -Wno-unused-parameter \
        -Wno-sign-compare \
        -Wno-ignored-qualifiers \
-       -Wnon-virtual-dtor \
+       -Wno-non-virtual-dtor \
        ${KNOWN_COMPILER_SPECIFIC_WARNINGS}")
 
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra ${KNOWN_WARNINGS}")
@@ -454,19 +457,16 @@ add_compile_definitions(FOLLY_HAVE_INT128_T=1)
 set_source(folly)
 resolve_dependency(folly)
 
-# Spark qury runner depends on absl, c-ares, grpc and boringssl.
+# Spark qury runner depends on absl, c-ares, grpc.
 set_source(absl)
 resolve_dependency(absl 20240116 EXACT)
 
-set_source(cares)
-resolve_dependency(cares 1.17.2 EXACT)
+set_source(c-ares)
+resolve_dependency(c-ares)
 
 set_source(gRPC)
 resolve_dependency(gRPC 1.48.1 EXACT)
 
-set_source(boringssl)
-resolve_dependency(boringssl)
-
 if(VELOX_ENABLE_REMOTE_FUNCTIONS)
   # TODO: Move this to use resolve_dependency(). For some reason, FBThrift
   # requires clients to explicitly install fizz and wangle.
@@ -475,11 +475,7 @@ if(VELOX_ENABLE_REMOTE_FUNCTIONS)
   find_package(FBThrift CONFIG REQUIRED)
 endif()
 
-if(DEFINED FOLLY_BENCHMARK_STATIC_LIB)
-  set(FOLLY_BENCHMARK ${FOLLY_BENCHMARK_STATIC_LIB})
-else()
-  set(FOLLY_BENCHMARK Folly::follybenchmark)
-endif()
+set(FOLLY_BENCHMARK Folly::follybenchmark)
 
 if(VELOX_ENABLE_GCS)
   set_source(google_cloud_cpp_storage)

diff --git a/scripts/setup-centos9.sh b/scripts/setup-centos9.sh
@@ -59,7 +59,7 @@ function install_velox_deps_from_dnf {
   dnf_install libevent-devel \
     openssl-devel re2-devel libzstd-devel lz4-devel double-conversion-devel \
     libdwarf-devel elfutils-libelf-devel curl-devel libicu-devel bison flex \
-    libsodium-devel zlib-devel
+    libsodium-devel zlib-devel go
 
   # install sphinx for doc gen
   pip install sphinx sphinx-tabs breathe sphinx_rtd_theme
@@ -221,26 +221,6 @@ function install_cuda {
   dnf install -y cuda-nvcc-$(echo $1 | tr '.' '-') cuda-cudart-devel-$(echo $1 | tr '.' '-')
 }
 
-function install_grpc {
-  git clone https://github.com/grpc/grpc.git --branch v1.50.0 --single-branch
-  (
-    cd grpc
-    git submodule update --init
-    mkdir -p cmake/build
-    cd cmake/build
-    cmake ../.. -DgRPC_INSTALL=ON              \
-              -DCMAKE_BUILD_TYPE=Release       \
-              -DgRPC_ABSL_PROVIDER=module      \
-              -DgRPC_CARES_PROVIDER=module     \
-              -DgRPC_PROTOBUF_PROVIDER=module  \
-              -DgRPC_RE2_PROVIDER=package      \
-              -DgRPC_SSL_PROVIDER=package      \
-              -DgRPC_ZLIB_PROVIDER=package
-    make "-j$(nproc)"
-    $SUDO make install
-  )
-}
-
 function install_velox_deps {
   run_and_time install_velox_deps_from_dnf
   run_and_time install_conda

diff --git a/velox/docs/develop/testing.rst b/velox/docs/develop/testing.rst
@@ -10,3 +10,4 @@ Testing Tools
     testing/memory-arbitration-fuzzer
     testing/row-number-fuzzer
     testing/writer-fuzzer
+    testing/spark-query-runner.rst
diff --git a/velox/docs/develop/testing/spark-query-runner.rst b/velox/docs/develop/testing/spark-query-runner.rst
@@ -0,0 +1,75 @@
+==================
+Spark Query Runner
+==================
+
+Introduction
+------------
+
+The Spark Query Runner is a tool designed to facilitate the testing of Velox.
+It achieves this by executing SQL queries on Apache Spark and comparing the results
+with those produced by Velox. It helps ensure the correctness of Velox's computing
+against Spark and provides a method for identifying potential issues in Velox's
+implementation. Currently Spark-3.5.1 is used in Velox.
+
+How It Works
+------------
+
+The Spark Query Runner operates by executing given SQL queries on both Spark and Velox.
+The results from both systems are then compared to check for any differences.
+If the results match, it indicates that Velox is producing the correct output.
+If the results differ, it suggests a potential issue in Velox that needs to be
+investigated.
+
+Since Spark 3.4, Spark Connect has introduced a decoupled client-server architecture
+for Spark that allows remote connectivity to Spark clusters. From the client
+perspective, Spark Connect mostly behaves as any other gRPC client, which is polyglot
+and and cross-platforms. During execution, the Spark Connect endpoint embedded on the
+Spark Server receives and translates unresolved logical plans into Spark’s logical plan
+operators. From there, the standard Spark execution process kicks in, ensuring that
+Spark Connect leverages all of Spark’s optimizations and enhancements. Results are
+streamed back to the client through gRPC as Apache Arrow-encoded row batches.
+
+In the Spark Query Runner, we use Spark Connect to submit queries to Spark and fetch
+the results back to Velox for comparison. The steps for this process are as follows:
+
+1. Provide the Spark SQL query to be executed. The query could be generated from Velox
+   plan node or manually written.
+2. Create a protobuf message `ExecutePlanRequest` from the SQL query. The protocols
+   used by Spark Connect are defined in `Apache Spark <https://github.com/apache/spark/tree/v3.5.1/connector/connect/common/src/main/protobuf/spark/connect>`_.
+3. Submit the message to SparkConnectService through gRPC API `ExecutePlan`.
+4. Fetch Spark's results from execution response. Results are in Arrow IPC stream format,
+   and can be read as Arrow RecordBatch by `arrow::ipc::RecordBatchReader`.
+5. Convert Arrow RecordBatch as Velox vector for comparison with Velox's results.
+
+Usage
+-----
+
+To use the Spark Query Runner, you will need to deploy an executable Spark and start the
+Spark Connect server with below command.
+
+.. code-block::
+
+    "$SPARK_HOME"/sbin/start-connect-server.sh --jars "$JAR_PATH"/spark-connect_2.12-3.5.1.jar
+
+
+The jar of Spark Connect could be downloaded from `maven repository <https://repo1.maven.org/maven2/org/apache/spark/spark-connect_2.12/3.5.1/>`_.
+If Spark Connect server is started successfully, you can see log as below. The server will
+be started at `localhost:15002`.
+
+.. code-block::
+
+    INFO SparkConnectServer: Spark Connect server started at: 0:0:0:0:0:0:0:0%0:15002
+
+You can then provide the Spark Query Runner with the SQL query and the data to run the
+query on. The tool will execute the query on Spark and return results as Velox vectors.
+
+To run fuzzer test with Spark Query Runner, you can download the spark-server docker image
+`ghcr.io/facebookincubator/velox-dev:spark-server` and run below command to start Spark
+connect server in it.
+
+.. code-block::
+
+    bash /opt/start-spark.sh
+
+Currently to use Spark as reference DB is only supported in aggregate fuzzer test. You can
+trigger its test referring to :doc:`Fuzzer <fuzzer>` and the results will be verified against Spark.
diff --git a/velox/dwio/parquet/writer/arrow/Encoding.cpp b/velox/dwio/parquet/writer/arrow/Encoding.cpp
@@ -505,9 +505,7 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
         dict_encoded_size_(0),
         memo_table_(pool, kInitialHashTableSize) {}
 
-  ~DictEncoderImpl() override {
-    DCHECK(buffered_indices_.empty());
-  }
+  ~DictEncoderImpl() = default;
 
   int dict_encoded_size() const override {
     return dict_encoded_size_;