Skip to content

Commit

Permalink
Merge branch 'main' of github.com:facebookincubator/velox
Browse files Browse the repository at this point in the history
  • Loading branch information
kgpai committed Nov 30, 2023
2 parents 35ac6ef + 2dc97de commit 57a35c0
Show file tree
Hide file tree
Showing 115 changed files with 4,313 additions and 1,287 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:

- name: "Build"
run: |
make debug NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=4 EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON"
make debug NUM_THREADS=16 MAX_HIGH_MEM_JOBS=8 MAX_LINK_JOBS=2 EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON"
ccache -s
- name: "Run Presto Fuzzer"
Expand Down
2 changes: 1 addition & 1 deletion CMake/resolve_dependency_modules/cpr.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ FetchContent_Declare(
PATCH_COMMAND git apply
${CMAKE_CURRENT_LIST_DIR}/cpr/cpr-libcurl-compatible.patch)
set(BUILD_SHARED_LIBS OFF)
set(CPR_USE_SYSTEM_CURL ON)
set(CPR_USE_SYSTEM_CURL OFF)
FetchContent_MakeAvailable(cpr)
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,11 @@ else()
endif()
resolve_dependency(glog)

if(${VELOX_ENABLE_DUCKDB})
set_source(DuckDB)
resolve_dependency(DuckDB)
endif()

set_source(fmt)
resolve_dependency(fmt)

Expand Down Expand Up @@ -528,11 +533,6 @@ endif()
find_package(BISON 3.0.4 REQUIRED)
find_package(FLEX 2.5.13 REQUIRED)

if(${VELOX_ENABLE_DUCKDB})
set_source(DuckDB)
resolve_dependency(DuckDB)
endif()

include_directories(SYSTEM velox)
include_directories(SYSTEM velox/external)

Expand Down
2 changes: 1 addition & 1 deletion build/deps/github_hashes/facebook/folly-rev.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Subproject commit fb047caf8418b9e9480374673ac60e0abdc20888
Subproject commit 94e741ce8779b850afe60f8c8058701c47fb79cb
20 changes: 14 additions & 6 deletions build/fbcode_builder/CMake/RustStaticLibrary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ endfunction()
# `${TARGET}` CMake library target.
#
# ```cmake
# rust_cxx_bridge(<TARGET> <CXX_BRIDGE_FILE> [CRATE <CRATE_NAME>])
# rust_cxx_bridge(<TARGET> <CXX_BRIDGE_FILE> [CRATE <CRATE_NAME>] [LIBS <LIBNAMES>])
# ```
#
# Parameters:
Expand All @@ -374,9 +374,11 @@ endfunction()
# - CRATE_NAME:
# Name of the crate. This parameter is optional. If unspecified, it will
# fallback to `${TARGET}`.
# - LIBS <lib1> [<lib2> ...]:
# A list of libraries that this library depends on.
#
function(rust_cxx_bridge TARGET CXX_BRIDGE_FILE)
fb_cmake_parse_args(ARG "" "CRATE" "" "${ARGN}")
fb_cmake_parse_args(ARG "" "CRATE" "LIBS" "${ARGN}")

if(DEFINED ARG_CRATE)
set(crate_name "${ARG_CRATE}")
Expand Down Expand Up @@ -476,6 +478,11 @@ function(rust_cxx_bridge TARGET CXX_BRIDGE_FILE)
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
$<INSTALL_INTERFACE:include>
)
target_link_libraries(
${crate_name}
PUBLIC
${ARG_LIBS}
)

file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/rust")
add_custom_command(
Expand Down Expand Up @@ -517,10 +524,11 @@ function(rust_cxx_bridge TARGET CXX_BRIDGE_FILE)
COMMENT "Generating cxx bindings for crate ${crate_name}"
)

target_sources(${crate_name}
target_sources(
${crate_name}
PRIVATE
"${CMAKE_CURRENT_BINARY_DIR}/${cxx_header}"
"${CMAKE_CURRENT_BINARY_DIR}/rust/cxx.h"
"${CMAKE_CURRENT_BINARY_DIR}/${cxx_source}"
"${CMAKE_CURRENT_BINARY_DIR}/${cxx_header}"
"${CMAKE_CURRENT_BINARY_DIR}/rust/cxx.h"
"${CMAKE_CURRENT_BINARY_DIR}/${cxx_source}"
)
endfunction()
18 changes: 18 additions & 0 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ void registerVeloxCounters() {
// and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterCacheShrinkTimeMs, 10, 0, 100'000, 50, 90, 99, 100);

// Track memory reclaim exec time in range of [0, 600s] and reports
// P50, P90, P99, and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterMemoryReclaimExecTimeMs, 20, 0, 600'000, 50, 90, 99, 100);

// Track memory reclaim task wait time in range of [0, 60s] and reports
// P50, P90, P99, and P100.
REPORT_ADD_HISTOGRAM_EXPORT_PERCENTILE(
kCounterMemoryReclaimWaitTimeMs, 10, 0, 60'000, 50, 90, 99, 100);

// Track memory reclaim bytes.
REPORT_ADD_STAT_EXPORT_TYPE(
kCounterMemoryReclaimedBytes, facebook::velox::StatType::SUM);

// Track the number of times that the memory reclaim wait timeouts.
REPORT_ADD_STAT_EXPORT_TYPE(
kCounterMemoryReclaimWaitTimeoutCount, facebook::velox::StatType::SUM);
}

} // namespace facebook::velox
14 changes: 13 additions & 1 deletion velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

namespace facebook::velox {

// Velox Counter Registration
/// Velox Counter Registration
void registerVeloxCounters();

constexpr folly::StringPiece kCounterHiveFileHandleGenerateLatencyMs{
Expand All @@ -30,4 +30,16 @@ constexpr folly::StringPiece kCounterCacheShrinkCount{
"velox.cache_shrink_count"};

constexpr folly::StringPiece kCounterCacheShrinkTimeMs{"velox.cache_shrink_ms"};

constexpr folly::StringPiece kCounterMemoryReclaimExecTimeMs{
"velox.memory_reclaim_exec_ms"};

constexpr folly::StringPiece kCounterMemoryReclaimedBytes{
"velox.memory_reclaim_bytes"};

constexpr folly::StringPiece kCounterMemoryReclaimWaitTimeMs{
"velox.memory_reclaim_wait_ms"};

constexpr folly::StringPiece kCounterMemoryReclaimWaitTimeoutCount{
"velox.memory_reclaim_wait_timeout_count"};
} // namespace facebook::velox
159 changes: 159 additions & 0 deletions velox/common/base/Scratch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "velox/common/base/RawVector.h"

/// A utility for reusable scoped temporary scratch areas.
namespace facebook::velox {

/// A collection of temporary reusable scratch vectors. The vectors are accessed
/// via the ScratchPtr scoped lease. The vectors are padded so that their last
/// element can be written at full SIMD width, as with raw_vector.
class Scratch {
public:
using Item = raw_vector<char>;

Scratch() = default;
Scratch(const Scratch& other) = delete;

~Scratch() {
reserve(0);
::free(items_);
items_ = nullptr;
capacity_ = 0;
fill_ = 0;
}
void operator=(const Scratch& other) = delete;

/// Returns the next reusable scratch vector or makes a new one.
Item get() {
if (fill_ == 0) {
return Item();
}
auto temp = std::move(items_[fill_ - 1]);
--fill_;
retainedSize_ -= temp.capacity();
return temp;
}

void release(Item&& item) {
retainedSize_ += item.capacity();
if (fill_ == capacity_) {
reserve(std::max(16, 2 * capacity_));
}
items_[fill_++] = std::move(item);
}

void trim() {
reserve(0);
retainedSize_ = 0;
}

size_t retainedSize() {
return retainedSize_;
}

private:
void reserve(int32_t newCapacity) {
VELOX_CHECK_LE(fill_, capacity_);
// Delete the items above the new capacity.
for (auto i = newCapacity; i < fill_; ++i) {
std::destroy_at(&items_[i]);
}
if (newCapacity > capacity_) {
Item* newItems =
reinterpret_cast<Item*>(::malloc(sizeof(Item) * newCapacity));
if (fill_ > 0) {
memcpy(newItems, items_, fill_ * sizeof(Item));
}
memset(newItems + fill_, 0, (newCapacity - fill_) * sizeof(Item));
free(items_);
items_ = newItems;
capacity_ = newCapacity;
}
fill_ = std::min(fill_, newCapacity);
}

Item* items_{nullptr};
int32_t fill_{0};
int32_t capacity_{0};
// The total size held. If too large from outlier use cases, 'this' should be
// trimmed.
int64_t retainedSize_{0};
};

/// A scoped lease for a scratch area of T. For scratch areas <=
/// 'inlineSize' the scratch area is inlined, typically on stack, and
/// no allocation will ever take place. The inline storage is padded
/// with a trailer of simd::kPadding bytes to allow writing at full
/// SIMD width at the end of the area.
template <typename T, int32_t inlineSize = 0>
class ScratchPtr {
public:
ScratchPtr(Scratch& scratch) : scratch_(&scratch) {}

ScratchPtr(const ScratchPtr& other) = delete;
ScratchPtr(ScratchPtr&& other) = delete;

inline ~ScratchPtr() {
if (data_.data()) {
scratch_->release(std::move(data_));
}
}

void operator=(ScratchPtr&& other) = delete;
void operator=(const ScratchPtr& other) = delete;

/// Returns a writable pointer to at least 'size' uninitialized
/// elements of T. The last element is followed by simd::kPadding
/// bytes to allow a full width SIMD store for any element. This may
/// be called once per lifetime.
T* get(int32_t size) {
VELOX_CHECK_NULL(ptr_);
size_ = size;
if (size <= inlineSize) {
ptr_ = inline_;
return ptr_;
}
data_ = scratch_->get();
data_.resize(size * sizeof(T));
ptr_ = reinterpret_cast<T*>(data_.data());
return ptr_;
}

/// Returns the pointer returned by a previous get(int32_t).
T* get() const {
VELOX_DCHECK_NOT_NULL(ptr_);
return ptr_;
}

/// Returns the size of the previous get(int32_t).
int32_t size() const {
return size_;
}

private:
Scratch* scratch_{nullptr};
raw_vector<char> data_;
T* ptr_{nullptr};
int32_t size_{0};
T inline_[inlineSize];
char padding_[inlineSize == 0 ? 0 : simd::kPadding];
};

} // namespace facebook::velox
71 changes: 62 additions & 9 deletions velox/common/base/SimdUtil-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,72 @@ int32_t indicesOfSetBits(
return result - originalResult;
}

namespace detail {

template <typename T, typename A>
xsimd::batch_bool<T, A> leadingMask(int n, const A&) {
constexpr int N = xsimd::batch_bool<T, A>::size;
static const auto kMemo = ({
std::array<xsimd::batch_bool<T, A>, N> memo;
struct LeadingMask {
LeadingMask() {
bool tmp[N]{};
for (int i = 0; i < N; ++i) {
memo[i] = xsimd::batch_bool<T, A>::load_unaligned(tmp);
memo_[i] = xsimd::batch_bool<T, A>::load_unaligned(tmp);
tmp[i] = true;
}
memo;
});
return LIKELY(n >= N) ? xsimd::batch_bool<T, A>(true) : kMemo[n];
memo_[N] = xsimd::batch_bool<T, A>::load_unaligned(tmp);
}

xsimd::batch_bool<T, A> operator[](size_t i) const {
return memo_[i];
}

private:
static constexpr int N = xsimd::batch_bool<T, A>::size;
xsimd::batch_bool<T, A> memo_[N + 1];
};

extern const LeadingMask<int32_t, xsimd::default_arch> leadingMask32;
extern const LeadingMask<int64_t, xsimd::default_arch> leadingMask64;

template <typename T, typename A>
xsimd::batch_bool<T, xsimd::default_arch> leadingMask(int i, const A&);

template <>
inline xsimd::batch_bool<int32_t, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return leadingMask32[i];
}

template <>
inline xsimd::batch_bool<float, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return reinterpret_cast<
xsimd::batch_bool<float, xsimd::default_arch>::register_type>(
leadingMask32[i].data);
}

template <>
inline xsimd::batch_bool<int64_t, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return leadingMask64[i];
}

template <>
inline xsimd::batch_bool<double, xsimd::default_arch> leadingMask(
int i,
const xsimd::default_arch&) {
return reinterpret_cast<
xsimd::batch_bool<double, xsimd::default_arch>::register_type>(
leadingMask64[i].data);
}

} // namespace detail

template <typename T, typename A>
xsimd::batch_bool<T, A> leadingMask(int n, const A& arch) {
constexpr int N = xsimd::batch_bool<T, A>::size;
return detail::leadingMask<T, A>(std::min(n, N), arch);
}

namespace detail {
Expand Down Expand Up @@ -294,7 +347,7 @@ inline bool copyNextWord(void*& to, const void*& from, int32_t& bytes) {
} // namespace detail

template <typename A>
void memcpy(void* to, const void* from, int32_t bytes, const A& arch) {
inline void memcpy(void* to, const void* from, int32_t bytes, const A& arch) {
while (bytes >= batchByteSize(arch)) {
if (!detail::copyNextWord<xsimd::batch<int8_t, A>, A>(to, from, bytes)) {
return;
Expand Down
Loading

0 comments on commit 57a35c0

Please sign in to comment.