Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DNM] adding param allowPrecisionLoss #394

Open
wants to merge 17 commits into
base: update
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: Velox Unit Tests Suite

on:
pull_request

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:

velox-test:
runs-on: self-hosted
container: ubuntu:22.04
steps:
- uses: actions/checkout@v2
- run: apt-get update && apt-get install ca-certificates -y && update-ca-certificates
- run: sed -i 's/http\:\/\/archive.ubuntu.com/https\:\/\/mirrors.ustc.edu.cn/g' /etc/apt/sources.list
- run: apt-get update
- run: apt-get install -y cmake ccache build-essential ninja-build sudo
- run: apt-get install -y libboost-all-dev libcurl4-openssl-dev
- run: apt-get install -y libssl-dev flex libfl-dev git openjdk-8-jdk axel *thrift* libkrb5-dev libgsasl7-dev libuuid1 uuid-dev
- run: apt-get install -y libz-dev
- run: |
axel https://github.com/protocolbuffers/protobuf/releases/download/v21.4//protobuf-all-21.4.tar.gz
tar xf protobuf-all-21.4.tar.gz
cd protobuf-21.4/cmake
CFLAGS=-fPIC CXXFLAGS=-fPIC cmake .. && make -j && make install
- run: |
axel https://dl.min.io/server/minio/release/linux-amd64/archive/minio_20220526054841.0.0_amd64.deb
dpkg -i minio_20220526054841.0.0_amd64.deb
rm minio_20220526054841.0.0_amd64.deb
- run: |
axel https://archive.apache.org/dist/hadoop/core/hadoop-2.10.1/hadoop-2.10.1.tar.gz
tar xf hadoop-2.10.1.tar.gz -C /usr/local/
- name: Compile C++ unit tests
run: |
git submodule sync --recursive && git submodule update --init --recursive
sed -i 's/sudo apt/apt/g' ./scripts/setup-ubuntu.sh
sed -i 's/sudo --preserve-env apt/apt/g' ./scripts/setup-ubuntu.sh
TZ=Asia/Shanghai ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && ./scripts/setup-ubuntu.sh
mkdir -p ~/adapter-deps/install
DEPENDENCY_DIR=~/adapter-deps PROMPT_ALWAYS_RESPOND=n ./scripts/setup-adapters.sh gcs aws hdfs
#make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON -DVELOX_ENABLE_HDFS=ON -DVELOX_ENABLE_S3=ON -DVELOX_ENABLE_GCS=ON" AWSSDK_ROOT_DIR=~/adapter-deps/install
#make debug EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON"
make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON"
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64/
export HADOOP_ROOT_LOGGER="WARN,DRFA"
export LIBHDFS3_CONF=$(pwd)/.circleci/hdfs-client.xml
export HADOOP_HOME='/usr/local/hadoop-2.10.1'
export PATH=~/adapter-deps/install/bin:/usr/local/hadoop-2.10.1/bin:${PATH}
cd _build/release && ctest -j32 -VV --output-on-failure

formatting-check:
name: Formatting Check
runs-on: ubuntu-latest
strategy:
matrix:
path:
- check: 'velox'
exclude: 'external'
steps:
- uses: actions/checkout@v2
- name: Run clang-format style check for C/C++ programs.
uses: jidicula/[email protected]
with:
clang-format-version: '12'
check-path: ${{ matrix.path['check'] }}
exclude-regex: ${{ matrix.path['exclude'] }}
272 changes: 272 additions & 0 deletions scripts/setup-centos7.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
#!/bin/bash
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -efx -o pipefail
# Some of the packages must be build with the same compiler flags
# so that some low level types are the same size. Also, disable warnings.
SCRIPTDIR=$(dirname "${BASH_SOURCE[0]}")
source $SCRIPTDIR/setup-helper-functions.sh
DEPENDENCY_DIR=${DEPENDENCY_DIR:-/tmp/velox-deps}
CPU_TARGET="${CPU_TARGET:-avx}"
NPROC=$(getconf _NPROCESSORS_ONLN)
FMT_VERSION=10.1.1
export CFLAGS=$(get_cxx_flags $CPU_TARGET) # Used by LZO.
export CXXFLAGS=$CFLAGS # Used by boost.
export CPPFLAGS=$CFLAGS # Used by LZO.
export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig:/usr/local/lib/pkgconfig:/usr/lib64/pkgconfig:/usr/lib/pkgconfig:$PKG_CONFIG_PATH
FB_OS_VERSION=v2024.02.26.00

# shellcheck disable=SC2037
SUDO="sudo -E"

function run_and_time {
time "$@"
{ echo "+ Finished running $*"; } 2> /dev/null
}

function dnf_install {
$SUDO dnf install -y -q --setopt=install_weak_deps=False "$@"
}

function yum_install {
$SUDO yum install -y "$@"
}

function wget_and_untar {
local URL=$1
local DIR=$2
mkdir -p "${DIR}"
wget -q --max-redirect 3 -O - "${URL}" | tar -xz -C "${DIR}" --strip-components=1
}

function install_cmake {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://cmake.org/files/v3.25/cmake-3.25.1.tar.gz cmake-3
cd cmake-3
./bootstrap --prefix=/usr/local
make -j$(nproc)
$SUDO make install
cmake --version
}

function install_ninja {
cd "${DEPENDENCY_DIR}"
github_checkout ninja-build/ninja v1.11.1
./configure.py --bootstrap
cmake -Bbuild-cmake
cmake --build build-cmake
$SUDO cp ninja /usr/local/bin/
}

function install_folly {
cd "${DEPENDENCY_DIR}"
github_checkout facebook/folly "${FB_OS_VERSION}"
cmake_install -DBUILD_TESTS=OFF -DFOLLY_HAVE_INT128_T=ON
}

function install_conda {
cd "${DEPENDENCY_DIR}"
mkdir -p conda && cd conda
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
MINICONDA_PATH=/opt/miniconda-for-velox
bash Miniconda3-latest-Linux-x86_64.sh -b -u $MINICONDA_PATH
}

function install_openssl {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1s.tar.gz openssl
cd openssl
./config no-shared
make depend
make
$SUDO make install
}

function install_gflags {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/gflags/gflags/archive/v2.2.2.tar.gz gflags
cd gflags
cmake_install -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DBUILD_gflags_LIB=ON -DLIB_SUFFIX=64 -DCMAKE_INSTALL_PREFIX:PATH=/usr/local
}

function install_glog {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/google/glog/archive/v0.5.0.tar.gz glog
cd glog
cmake_install -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON -DCMAKE_INSTALL_PREFIX:PATH=/usr/local
}

function install_snappy {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/google/snappy/archive/1.1.8.tar.gz snappy
cd snappy
cmake_install -DSNAPPY_BUILD_TESTS=OFF
}

function install_dwarf {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/davea42/libdwarf-code/archive/refs/tags/20210528.tar.gz dwarf
cd dwarf
#local URL=https://github.com/davea42/libdwarf-code/releases/download/v0.5.0/libdwarf-0.5.0.tar.xz
#local DIR=dwarf
#mkdir -p "${DIR}"
#wget -q --max-redirect 3 "${URL}"
#tar -xf libdwarf-0.5.0.tar.xz -C "${DIR}"
#cd dwarf/libdwarf-0.5.0
./configure --enable-shared=no
make
make check
$SUDO make install
}

function install_re2 {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/google/re2/archive/refs/tags/2023-03-01.tar.gz re2
cd re2
$SUDO make install
}

function install_flex {
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz flex
cd flex
./autogen.sh
./configure
$SUDO make install
}

function install_lzo {
cd "${DEPENDENCY_DIR}"
wget_and_untar http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz lzo
cd lzo
./configure --prefix=/usr/local --enable-shared --disable-static --docdir=/usr/local/share/doc/lzo-2.10
make "-j$(nproc)"
$SUDO make install
}

function install_boost {
# Remove old version.
sudo rm -f /usr/local/lib/libboost_* /usr/lib64/libboost_* /opt/rh/devtoolset-9/root/usr/lib64/dyninst/libboost_*
sudo rm -rf /tmp/velox-deps/boost/ /usr/local/include/boost/ /usr/local/lib/cmake/Boost-1.72.0/
cd "${DEPENDENCY_DIR}"
wget_and_untar https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.gz boost
cd boost
./bootstrap.sh --prefix=/usr/local --with-python=/usr/bin/python3 --with-python-root=/usr/lib/python3.6 --without-libraries=python
$SUDO ./b2 "-j$(nproc)" -d0 install threading=multi
}

function install_libhdfs3 {
cd "${DEPENDENCY_DIR}"
github_checkout apache/hawq master
cd depends/libhdfs3
sed -i "/FIND_PACKAGE(GoogleTest REQUIRED)/d" ./CMakeLists.txt
sed -i "s/dumpversion/dumpfullversion/" ./CMake/Platform.cmake
sed -i "s/dfs.domain.socket.path\", \"\"/dfs.domain.socket.path\", \"\/var\/lib\/hadoop-hdfs\/dn_socket\"/g" src/common/SessionConfig.cpp
sed -i "s/pos < endOfCurBlock/pos \< endOfCurBlock \&\& pos \- cursor \<\= 128 \* 1024/g" src/client/InputStreamImpl.cpp
cmake_install
}

function install_protobuf {
cd "${DEPENDENCY_DIR}"
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.4/protobuf-all-21.4.tar.gz
tar -xzf protobuf-all-21.4.tar.gz
cd protobuf-21.4
./configure CXXFLAGS="-fPIC" --prefix=/usr/local
make "-j$(nproc)"
$SUDO make install
}

function install_awssdk {
cd "${DEPENDENCY_DIR}"
github_checkout aws/aws-sdk-cpp 1.9.379 --depth 1 --recurse-submodules
cmake_install -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS:BOOL=OFF -DMINIMIZE_SIZE:BOOL=ON -DENABLE_TESTING:BOOL=OFF -DBUILD_ONLY:STRING="s3;identity-management"
}

function install_gtest {
cd "${DEPENDENCY_DIR}"
wget https://github.com/google/googletest/archive/refs/tags/release-1.12.1.tar.gz
tar -xzf release-1.12.1.tar.gz
cd googletest-release-1.12.1
mkdir -p build && cd build && cmake -DBUILD_GTEST=ON -DBUILD_GMOCK=ON -DINSTALL_GTEST=ON -DINSTALL_GMOCK=ON -DBUILD_SHARED_LIBS=ON ..
make "-j$(nproc)"
$SUDO make install
}

function install_fmt {
rm -rf /usr/local/lib64/libfmt.a
rm -rf /usr/local/lib64/cmake/fmt
rm -rf /usr/local/include/fmt
rm -rf fmt
wget_and_untar https://github.com/fmtlib/fmt/archive/10.1.1.tar.gz fmt
cmake_install fmt -DFMT_TEST=OFF
}

function install_prerequisites {
run_and_time install_lzo
run_and_time install_boost
run_and_time install_re2
run_and_time install_flex
run_and_time install_openssl
run_and_time install_gflags
run_and_time install_glog
run_and_time install_snappy
run_and_time install_dwarf
}

function install_velox_deps {
run_and_time install_fmt
run_and_time install_folly
run_and_time install_conda
}

$SUDO dnf makecache

# dnf install dependency libraries
dnf_install epel-release dnf-plugins-core # For ccache, ninja
# PowerTools only works on CentOS8
# dnf config-manager --set-enabled powertools
dnf_install ccache git wget which libevent-devel \
openssl-devel libzstd-devel lz4-devel double-conversion-devel \
curl-devel libxml2-devel libgsasl-devel libuuid-devel patch

$SUDO dnf remove -y gflags

# Required for Thrift
dnf_install autoconf automake libtool bison python3 python3-devel

# Required for build flex
dnf_install gettext-devel texinfo help2man

# dnf_install conda

# Activate gcc9; enable errors on unset variables afterwards.
# GCC9 install via yum and devtoolset
# dnf install gcc-toolset-9 only works on CentOS8

$SUDO yum makecache
yum_install centos-release-scl
yum_install devtoolset-9
source /opt/rh/devtoolset-9/enable || exit 1
gcc --version
set -u

# Build from source
[ -d "$DEPENDENCY_DIR" ] || mkdir -p "$DEPENDENCY_DIR"

run_and_time install_cmake
run_and_time install_ninja

install_prerequisites
install_velox_deps
1 change: 1 addition & 0 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ std::unique_ptr<core::PartitionFunction> HivePartitionFunctionSpec::create(
void HiveConnectorFactory::initialize() {
[[maybe_unused]] static bool once = []() {
dwio::common::registerFileSinks();
dwrf::registerOrcReaderFactory();
dwrf::registerDwrfReaderFactory();
dwrf::registerDwrfWriterFactory();
// Meta's buck build system needs this check.
Expand Down
15 changes: 9 additions & 6 deletions velox/connectors/hive/HiveDataSource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,15 @@ HiveDataSource::HiveDataSource(
filters.emplace(k.clone(), v->clone());
}
double sampleRate = 1;
auto remainingFilter = extractFiltersFromRemainingFilter(
hiveTableHandle_->remainingFilter(),
expressionEvaluator_,
false,
filters,
sampleRate);
auto remainingFilter = hiveTableHandle_->remainingFilter();
if (hiveTableHandle_->isFilterPushdownEnabled()) {
remainingFilter = extractFiltersFromRemainingFilter(
hiveTableHandle_->remainingFilter(),
expressionEvaluator_,
false,
filters,
sampleRate);
}
if (sampleRate != 1) {
randomSkip_ = std::make_shared<random::RandomSkipTracker>(sampleRate);
}
Expand Down
Loading
Loading