Skip to content

Commit

Permalink
Fix CI
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Jul 16, 2024
1 parent 176bb44 commit 47f7796
Show file tree
Hide file tree
Showing 18 changed files with 455 additions and 253 deletions.
45 changes: 31 additions & 14 deletions .github/workflows/experimental.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,37 +169,54 @@ jobs:
/tmp/aggregate_fuzzer_repro
/tmp/server.log
linux-spark-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
spark-java-aggregation-fuzzer-run:
runs-on: 8-core-ubuntu
container: ghcr.io/facebookincubator/velox-dev:spark-server
timeout-minutes: 120
env:
CCACHE_DIR: "/__w/velox/velox/.ccache/"
LINUX_DISTRO: "centos"
steps:

- name: "Restore ccache"
uses: actions/cache@v3
with:
path: "${{ env.CCACHE_DIR }}"
# We are using the benchmark ccache as it has all
# required features enabled, so no need to create a new one
key: ccache-spark-${{ github.sha }}
restore-keys: |
ccache-spark-
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"

- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh && install_apt_deps

- name: Download spark aggregation fuzzer
uses: actions/download-artifact@v3
with:
name: spark_aggregation_fuzzer
- name: "Build"
run: |
cd velox
source /opt/rh/gcc-toolset-12/enable
make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
ccache -s
- name: "Run Spark Aggregate Fuzzer"
run: |
cd velox
bash /opt/start-spark.sh
# Sleep for 60 seconds to allow Spark server to start.
sleep 60
mkdir -p /tmp/spark_aggregate_fuzzer_repro/
rm -rfv /tmp/spark_aggregate_fuzzer_repro/*
chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
chmod +x spark_aggregation_fuzzer_test
./spark_aggregation_fuzzer_test \
_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 1800 \
--duration_sec 3600 \
--logtostderr=1 \
--minloglevel=0 \
--repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \
--enable_sorted_aggregations=true \
&& echo -e "\n\nSpark Aggregation Fuzzer run finished successfully."
- name: Archive Spark aggregate production artifacts
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,9 @@ jobs:
spark-aggregate-fuzzer-run:
name: Spark Aggregate Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
container: ghcr.io/facebookincubator/velox-dev:spark-server
needs: compile
timeout-minutes: 60
timeout-minutes: 120
steps:

- name: Download spark aggregation fuzzer
Expand All @@ -466,6 +466,9 @@ jobs:

- name: Run Spark Aggregate Fuzzer
run: |
bash /opt/start-spark.sh
# Sleep for 60 seconds to allow Spark server to start.
sleep 60
mkdir -p /tmp/spark_aggregate_fuzzer_repro/logs/
chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
chmod +x spark_aggregation_fuzzer_test
Expand Down
40 changes: 40 additions & 0 deletions CMake/Findc-ares.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

find_package(c-ares CONFIG)
if(c-ares_FOUND)
if(TARGET c-ares::cares)
return()
endif()
endif()

find_path(
C_ARES_INCLUDE_DIR
NAMES ares.h
PATH_SUFFIXES c-ares)
find_library(C_ARES_LIBRARY NAMES c-ares)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(c-ares DEFAULT_MSG C_ARES_LIBRARY
C_ARES_INCLUDE_DIR)

if(c-ares_FOUND AND NOT TARGET c-ares::cares)
add_library(c-ares::cares UNKNOWN IMPORTED)
set_target_properties(
cares::cares
PROPERTIES IMPORTED_LOCATION "${C_ARES_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES "${C_ARES_INCLUDE_DIR}")
endif()

mark_as_advanced(C_ARES_INCLUDE_DIR C_ARES_LIBRARY)
36 changes: 0 additions & 36 deletions CMake/resolve_dependency_modules/boringssl.cmake

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ resolve_dependency_url(CARES)
message(STATUS "Building C-ARES from source")

FetchContent_Declare(
cares
c-ares
URL ${VELOX_CARES_SOURCE_URL}
URL_HASH ${VELOX_CARES_BUILD_SHA256_CHECKSUM}
OVERRIDE_FIND_PACKAGE EXCLUDE_FROM_ALL SYSTEM)

set(CARES_STATIC ON)
set(CARES_INSTALL ON)
set(CARES_SHARED OFF)
FetchContent_MakeAvailable(cares)
add_library(cares::cares ALIAS c-ares)
FetchContent_MakeAvailable(c-ares)
if(NOT TARGET c-ares::cares)
add_library(c-ares::cares ALIAS c-ares)
endif()
4 changes: 4 additions & 0 deletions CMake/resolve_dependency_modules/curl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ string(

resolve_dependency_url(CURL)

set(PREVIOUS_BUILD_TYPE ${CMAKE_BUILD_TYPE})
set(CMAKE_BUILD_TYPE Release)
FetchContent_Declare(
curl
URL ${VELOX_CURL_SOURCE_URL}
URL_HASH ${VELOX_CURL_BUILD_SHA256_CHECKSUM})
FetchContent_MakeAvailable(curl)
set(CMAKE_BUILD_TYPE ${PREVIOUS_BUILD_TYPE})
4 changes: 1 addition & 3 deletions CMake/resolve_dependency_modules/folly/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,4 @@ if(${gflags_SOURCE} STREQUAL "BUNDLED")
add_dependencies(folly glog gflags_static fmt::fmt)
endif()

set(FOLLY_BENCHMARK_STATIC_LIB
${folly_BINARY_DIR}/folly/libfollybenchmark${CMAKE_STATIC_LIBRARY_SUFFIX}
PARENT_SCOPE)
add_library(Folly::follybenchmark ALIAS follybenchmark)
22 changes: 9 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
cmake_minimum_required(VERSION 3.28)
message(STATUS "Building using CMake version: ${CMAKE_VERSION}")

set(BUILD_SHARED_LIBS OFF)

# The policy allows us to change options without caching.
cmake_policy(SET CMP0077 NEW)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
Expand Down Expand Up @@ -329,15 +331,16 @@ if("${ENABLE_ALL_WARNINGS}")
-Wno-maybe-uninitialized \
-Wno-unused-result \
-Wno-format-overflow \
-Wno-strict-aliasing")
-Wno-strict-aliasing \
-Wno-mismatched-new-delete")
endif()

set(KNOWN_WARNINGS
"-Wno-unused \
-Wno-unused-parameter \
-Wno-sign-compare \
-Wno-ignored-qualifiers \
-Wnon-virtual-dtor \
-Wno-non-virtual-dtor \
${KNOWN_COMPILER_SPECIFIC_WARNINGS}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra ${KNOWN_WARNINGS}")
Expand Down Expand Up @@ -454,19 +457,16 @@ add_compile_definitions(FOLLY_HAVE_INT128_T=1)
set_source(folly)
resolve_dependency(folly)

# Spark qury runner depends on absl, c-ares, grpc and boringssl.
# Spark qury runner depends on absl, c-ares, grpc.
set_source(absl)
resolve_dependency(absl 20240116 EXACT)

set_source(cares)
resolve_dependency(cares 1.17.2 EXACT)
set_source(c-ares)
resolve_dependency(c-ares)

set_source(gRPC)
resolve_dependency(gRPC 1.48.1 EXACT)

set_source(boringssl)
resolve_dependency(boringssl)

if(VELOX_ENABLE_REMOTE_FUNCTIONS)
# TODO: Move this to use resolve_dependency(). For some reason, FBThrift
# requires clients to explicitly install fizz and wangle.
Expand All @@ -475,11 +475,7 @@ if(VELOX_ENABLE_REMOTE_FUNCTIONS)
find_package(FBThrift CONFIG REQUIRED)
endif()

if(DEFINED FOLLY_BENCHMARK_STATIC_LIB)
set(FOLLY_BENCHMARK ${FOLLY_BENCHMARK_STATIC_LIB})
else()
set(FOLLY_BENCHMARK Folly::follybenchmark)
endif()
set(FOLLY_BENCHMARK Folly::follybenchmark)

if(VELOX_ENABLE_GCS)
set_source(google_cloud_cpp_storage)
Expand Down
22 changes: 1 addition & 21 deletions scripts/setup-centos9.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ function install_velox_deps_from_dnf {
dnf_install libevent-devel \
openssl-devel re2-devel libzstd-devel lz4-devel double-conversion-devel \
libdwarf-devel elfutils-libelf-devel curl-devel libicu-devel bison flex \
libsodium-devel zlib-devel
libsodium-devel zlib-devel go

# install sphinx for doc gen
pip install sphinx sphinx-tabs breathe sphinx_rtd_theme
Expand Down Expand Up @@ -221,26 +221,6 @@ function install_cuda {
dnf install -y cuda-nvcc-$(echo $1 | tr '.' '-') cuda-cudart-devel-$(echo $1 | tr '.' '-')
}

function install_grpc {
git clone https://github.com/grpc/grpc.git --branch v1.50.0 --single-branch
(
cd grpc
git submodule update --init
mkdir -p cmake/build
cd cmake/build
cmake ../.. -DgRPC_INSTALL=ON \
-DCMAKE_BUILD_TYPE=Release \
-DgRPC_ABSL_PROVIDER=module \
-DgRPC_CARES_PROVIDER=module \
-DgRPC_PROTOBUF_PROVIDER=module \
-DgRPC_RE2_PROVIDER=package \
-DgRPC_SSL_PROVIDER=package \
-DgRPC_ZLIB_PROVIDER=package
make "-j$(nproc)"
$SUDO make install
)
}

function install_velox_deps {
run_and_time install_velox_deps_from_dnf
run_and_time install_conda
Expand Down
1 change: 1 addition & 0 deletions velox/docs/develop/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Testing Tools
testing/memory-arbitration-fuzzer
testing/row-number-fuzzer
testing/writer-fuzzer
testing/spark-query-runner.rst
75 changes: 75 additions & 0 deletions velox/docs/develop/testing/spark-query-runner.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
==================
Spark Query Runner
==================

Introduction
------------

The Spark Query Runner is a tool designed to facilitate the testing of Velox.
It achieves this by executing SQL queries on Apache Spark and comparing the results
with those produced by Velox. It helps ensure the correctness of Velox's computing
against Spark and provides a method for identifying potential issues in Velox's
implementation. Currently Spark-3.5.1 is used in Velox.

How It Works
------------

The Spark Query Runner operates by executing given SQL queries on both Spark and Velox.
The results from both systems are then compared to check for any differences.
If the results match, it indicates that Velox is producing the correct output.
If the results differ, it suggests a potential issue in Velox that needs to be
investigated.

Since Spark 3.4, Spark Connect has introduced a decoupled client-server architecture
for Spark that allows remote connectivity to Spark clusters. From the client
perspective, Spark Connect mostly behaves as any other gRPC client, which is polyglot
and and cross-platforms. During execution, the Spark Connect endpoint embedded on the
Spark Server receives and translates unresolved logical plans into Spark’s logical plan
operators. From there, the standard Spark execution process kicks in, ensuring that
Spark Connect leverages all of Spark’s optimizations and enhancements. Results are
streamed back to the client through gRPC as Apache Arrow-encoded row batches.

In the Spark Query Runner, we use Spark Connect to submit queries to Spark and fetch
the results back to Velox for comparison. The steps for this process are as follows:

1. Provide the Spark SQL query to be executed. The query could be generated from Velox
plan node or manually written.
2. Create a protobuf message `ExecutePlanRequest` from the SQL query. The protocols
used by Spark Connect are defined in `Apache Spark <https://github.com/apache/spark/tree/v3.5.1/connector/connect/common/src/main/protobuf/spark/connect>`_.
3. Submit the message to SparkConnectService through gRPC API `ExecutePlan`.
4. Fetch Spark's results from execution response. Results are in Arrow IPC stream format,
and can be read as Arrow RecordBatch by `arrow::ipc::RecordBatchReader`.
5. Convert Arrow RecordBatch as Velox vector for comparison with Velox's results.

Usage
-----

To use the Spark Query Runner, you will need to deploy an executable Spark and start the
Spark Connect server with below command.

.. code-block::
"$SPARK_HOME"/sbin/start-connect-server.sh --jars "$JAR_PATH"/spark-connect_2.12-3.5.1.jar
The jar of Spark Connect could be downloaded from `maven repository <https://repo1.maven.org/maven2/org/apache/spark/spark-connect_2.12/3.5.1/>`_.
If Spark Connect server is started successfully, you can see log as below. The server will
be started at `localhost:15002`.

.. code-block::
INFO SparkConnectServer: Spark Connect server started at: 0:0:0:0:0:0:0:0%0:15002
You can then provide the Spark Query Runner with the SQL query and the data to run the
query on. The tool will execute the query on Spark and return results as Velox vectors.

To run fuzzer test with Spark Query Runner, you can download the spark-server docker image
`ghcr.io/facebookincubator/velox-dev:spark-server` and run below command to start Spark
connect server in it.

.. code-block::
bash /opt/start-spark.sh
Currently to use Spark as reference DB is only supported in aggregate fuzzer test. You can
trigger its test referring to :doc:`Fuzzer <fuzzer>` and the results will be verified against Spark.
4 changes: 1 addition & 3 deletions velox/dwio/parquet/writer/arrow/Encoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,9 +505,7 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
dict_encoded_size_(0),
memo_table_(pool, kInitialHashTableSize) {}

~DictEncoderImpl() override {
DCHECK(buffered_indices_.empty());
}
~DictEncoderImpl() = default;

int dict_encoded_size() const override {
return dict_encoded_size_;
Expand Down
Loading

0 comments on commit 47f7796

Please sign in to comment.