Skip to content

Commit

Permalink
Add more size classes
Browse files Browse the repository at this point in the history
Experiment to see dynamics with 2, 4, 8, 16 MB classes added.
  • Loading branch information
Orri Erling committed Jun 11, 2024
1 parent 65bd5de commit 64d34e3
Show file tree
Hide file tree
Showing 146 changed files with 1,785 additions and 4,112 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/build-metrics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,6 @@ jobs:
python3 -m pip install -r scripts/benchmark-requirements.txt
- name: "Upload Metrics"
# This disables the upload and report generation on fork PRs but allows it for forks from within the main repo.
if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == 'facebookincubator/velox' }}
env:
CONBENCH_URL: "https://velox-conbench.voltrondata.run/"
CONBENCH_MACHINE_INFO_NAME: "GitHub-runner-${{ matrix.runner }}"
Expand All @@ -120,7 +118,6 @@ jobs:
"/tmp/metrics"
upload-report:
if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == 'facebookincubator/velox' }}
permissions:
contents: write
runs-on: ubuntu-latest
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,7 @@ permissions:
contents: read

concurrency:
# This will not cancel fuzzer runs on main (regardless of which trigger)
# by making the commit sha part of the group but will use the branch
# name in PRs to cancel on going runs on a new commit.
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }}
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true

env:
Expand Down
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,6 @@ if(CMAKE_HOST_SYSTEM_NAME MATCHES "Darwin")
endif()
find_package(BISON 3.0.4 REQUIRED)
find_package(FLEX 2.5.13 REQUIRED)
find_package(double-conversion 3.1.5 REQUIRED)

include_directories(SYSTEM velox)
include_directories(SYSTEM velox/external)
Expand Down
64 changes: 17 additions & 47 deletions velox/benchmarks/basic/CastBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,9 @@ int main(int argc, char** argv) {
ExpressionBenchmarkBuilder benchmarkBuilder;
const vector_size_t vectorSize = 1000;
auto vectorMaker = benchmarkBuilder.vectorMaker();
auto emptyInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return ""; });
auto validInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto row) { return std::to_string(row); });
auto invalidInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return "$"; });
auto validDoubleStringInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto row) { return fmt::format("{}.12345678910", row); });
auto validNaNInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return "NaN"; });
auto validInfinityInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return "Infinity"; });
auto invalidNaNInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return "nan"; });
auto invalidInfinityInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return "infinity"; });
auto spaceInput = vectorMaker.flatVector<std::string>(
vectorSize, [](auto /*row*/) { return " "; });
auto emptyInput = vectorMaker.flatVector<facebook::velox::StringView>({""});
auto validInput = vectorMaker.flatVector<facebook::velox::StringView>({""});
auto nanInput = vectorMaker.flatVector<facebook::velox::StringView>({""});
auto decimalInput = vectorMaker.flatVector<int64_t>(
vectorSize, [&](auto j) { return 12345 * j; }, nullptr, DECIMAL(9, 2));
auto shortDecimalInput = vectorMaker.flatVector<int64_t>(
Expand Down Expand Up @@ -80,6 +65,16 @@ int main(int argc, char** argv) {
auto invalidDateStrings = vectorMaker.flatVector<std::string>(
vectorSize, [](auto row) { return fmt::format("2024-05...{}", row); });

emptyInput->resize(vectorSize);
validInput->resize(vectorSize);
nanInput->resize(vectorSize);

for (int i = 0; i < vectorSize; i++) {
nanInput->set(i, "$"_sv);
emptyInput->set(i, StringView::makeInline(std::string("")));
validInput->set(i, StringView::makeInline(std::to_string(i)));
}

benchmarkBuilder
.addBenchmarkSet(
"cast_varhar_as_date",
Expand Down Expand Up @@ -117,37 +112,13 @@ int main(int argc, char** argv) {
vectorMaker.rowVector({"timestamp"}, {timestampInput}))
.addExpression("cast", "cast (timestamp as varchar)");

benchmarkBuilder
.addBenchmarkSet(
"cast_varchar_as_double",
vectorMaker.rowVector(
{"valid",
"valid_nan",
"valid_infinity",
"invalid_nan",
"invalid_infinity",
"space"},
{validDoubleStringInput,
validNaNInput,
validInfinityInput,
invalidNaNInput,
invalidInfinityInput,
spaceInput}))
.addExpression("cast_valid", "cast (valid as double)")
.addExpression("cast_valid_nan", "cast (valid_nan as double)")
.addExpression("cast_valid_infinity", "cast (valid_infinity as double)")
.addExpression("try_cast_invalid_nan", "try_cast (invalid_nan as double)")
.addExpression(
"try_cast_invalid_infinity", "try_cast (invalid_infinity as double)")
.addExpression("try_cast_space", "try_cast (space as double)");

benchmarkBuilder
.addBenchmarkSet(
"cast",
vectorMaker.rowVector(
{"valid",
"empty",
"invalid",
"nan",
"decimal",
"short_decimal",
"long_decimal",
Expand All @@ -157,7 +128,7 @@ int main(int argc, char** argv) {
"large_double"},
{validInput,
emptyInput,
invalidInput,
nanInput,
decimalInput,
shortDecimalInput,
longDecimalInput,
Expand All @@ -168,9 +139,8 @@ int main(int argc, char** argv) {
.addExpression("try_cast_invalid_empty_input", "try_cast (empty as int) ")
.addExpression(
"tryexpr_cast_invalid_empty_input", "try (cast (empty as int))")
.addExpression("try_cast_invalid_number", "try_cast (invalid as int)")
.addExpression(
"tryexpr_cast_invalid_number", "try (cast (invalid as int))")
.addExpression("try_cast_invalid_nan", "try_cast (nan as int)")
.addExpression("tryexpr_cast_invalid_nan", "try (cast (nan as int))")
.addExpression("try_cast_valid", "try_cast (valid as int)")
.addExpression("tryexpr_cast_valid", "try (cast (valid as int))")
.addExpression("cast_valid", "cast(valid as int)")
Expand Down
7 changes: 0 additions & 7 deletions velox/benchmarks/tpch/TpchBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,6 @@ class TpchBenchmark {
configurationValues
[connector::hive::HiveConfig::kMaxCoalescedDistanceBytes] =
std::to_string(FLAGS_max_coalesced_distance_bytes);
configurationValues[connector::hive::HiveConfig::kPrefetchRowGroups] =
std::to_string(FLAGS_parquet_prefetch_rowgroups);
auto properties =
std::make_shared<const core::MemConfig>(configurationValues);

Expand Down Expand Up @@ -496,11 +494,6 @@ BENCHMARK(q1) {
benchmark.run(planContext);
}

BENCHMARK(q2) {
const auto planContext = queryBuilder->getQueryPlan(2);
benchmark.run(planContext);
}

BENCHMARK(q3) {
const auto planContext = queryBuilder->getQueryPlan(3);
benchmark.run(planContext);
Expand Down
2 changes: 0 additions & 2 deletions velox/common/base/SpillConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ SpillConfig::SpillConfig(
std::string _fileNamePrefix,
uint64_t _maxFileSize,
uint64_t _writeBufferSize,
uint64_t _readBufferSize,
folly::Executor* _executor,
int32_t _minSpillableReservationPct,
int32_t _spillableReservationGrowthPct,
Expand All @@ -42,7 +41,6 @@ SpillConfig::SpillConfig(
_maxFileSize == 0 ? std::numeric_limits<int64_t>::max()
: _maxFileSize),
writeBufferSize(_writeBufferSize),
readBufferSize(_readBufferSize),
executor(_executor),
minSpillableReservationPct(_minSpillableReservationPct),
spillableReservationGrowthPct(_spillableReservationGrowthPct),
Expand Down
6 changes: 0 additions & 6 deletions velox/common/base/SpillConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ struct SpillConfig {
std::string _filePath,
uint64_t _maxFileSize,
uint64_t _writeBufferSize,
uint64_t _readBufferSize,
folly::Executor* _executor,
int32_t _minSpillableReservationPct,
int32_t _spillableReservationGrowthPct,
Expand Down Expand Up @@ -94,11 +93,6 @@ struct SpillConfig {
/// storage system for io efficiency.
uint64_t writeBufferSize;

/// Specifies the buffer size to read from one spilled file. If the underlying
/// filesystem supports async read, we do read-ahead with double buffering,
/// which doubles the buffer used to read from each spill file.
uint64_t readBufferSize;

/// Executor for spilling. If nullptr spilling writes on the Driver's thread.
folly::Executor* executor; // Not owned.

Expand Down
19 changes: 0 additions & 19 deletions velox/common/base/StatsReporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,27 +60,8 @@ enum class StatType {
RATE,
/// Tracks the count of inserted values.
COUNT,
/// Tracks the histogram of inserted values.
HISTOGRAM,
};

inline std::string statTypeString(StatType stat) {
switch (stat) {
case StatType::AVG:
return "Avg";
case StatType::SUM:
return "Sum";
case StatType::RATE:
return "Rate";
case StatType::COUNT:
return "Count";
case StatType::HISTOGRAM:
return "Histogram";
default:
return fmt::format("UNKNOWN: {}", static_cast<int>(stat));
}
}

/// This is the base stats reporter interface that should be extended by
/// different implementations.
class BaseStatsReporter {
Expand Down
3 changes: 0 additions & 3 deletions velox/common/base/tests/SpillConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ TEST(SpillConfig, spillLevel) {
"fakeSpillPath",
0,
0,
0,
nullptr,
0,
0,
Expand Down Expand Up @@ -116,7 +115,6 @@ TEST(SpillConfig, spillLevelLimit) {
"fakeSpillPath",
0,
0,
0,
nullptr,
0,
0,
Expand Down Expand Up @@ -162,7 +160,6 @@ TEST(SpillConfig, spillableReservationPercentages) {
"spillableReservationPercentages",
0,
0,
0,
nullptr,
testData.minPct,
testData.growthPct,
Expand Down
16 changes: 9 additions & 7 deletions velox/common/file/File.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,11 @@ class ReadFile {
folly::Range<const common::Region*> regions,
folly::Range<folly::IOBuf*> iobufs) const;

/// Like preadv but may execute asynchronously and returns the read size or
/// exception via SemiFuture. Use hasPreadvAsync() to check if the
/// implementation is in fact asynchronous.
///
/// This method should be thread safe.
// Like preadv but may execute asynchronously and returns the read
// size or exception via SemiFuture. Use hasPreadvAsync() to check
// if the implementation is in fact asynchronous.
//
// This method should be thread safe.
virtual folly::SemiFuture<uint64_t> preadvAsync(
uint64_t offset,
const std::vector<folly::Range<char*>>& buffers) const {
Expand Down Expand Up @@ -124,8 +124,10 @@ class ReadFile {

virtual std::string getName() const = 0;

/// Gets the natural size for reads. Returns the number of bytes that should
/// be read at once.
//
// Get the natural size for reads.
// @return the number of bytes that should be read at once
//
virtual uint64_t getNaturalReadSize() const = 0;

protected:
Expand Down
25 changes: 2 additions & 23 deletions velox/common/file/tests/FaultyFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,10 @@ std::string FaultFileOperation::typeString(Type type) {
FaultyReadFile::FaultyReadFile(
const std::string& path,
std::shared_ptr<ReadFile> delegatedFile,
FileFaultInjectionHook injectionHook,
folly::Executor* executor)
FileFaultInjectionHook injectionHook)
: path_(path),
delegatedFile_(std::move(delegatedFile)),
injectionHook_(std::move(injectionHook)),
executor_(executor) {
injectionHook_(std::move(injectionHook)) {
VELOX_CHECK_NOT_NULL(delegatedFile_);
}

Expand Down Expand Up @@ -68,25 +66,6 @@ uint64_t FaultyReadFile::preadv(
return delegatedFile_->preadv(offset, buffers);
}

folly::SemiFuture<uint64_t> FaultyReadFile::preadvAsync(
uint64_t offset,
const std::vector<folly::Range<char*>>& buffers) const {
// TODO: add fault injection for async read later.
if (delegatedFile_->hasPreadvAsync() || executor_ == nullptr) {
return delegatedFile_->preadvAsync(offset, buffers);
}
auto promise = std::make_unique<folly::Promise<uint64_t>>();
folly::SemiFuture<uint64_t> future = promise->getSemiFuture();
executor_->add([this,
_promise = std::move(promise),
_offset = offset,
_buffers = buffers]() {
auto delegateFuture = delegatedFile_->preadvAsync(_offset, _buffers);
_promise->setValue(delegateFuture.wait().value());
});
return future;
}

FaultyWriteFile::FaultyWriteFile(
const std::string& path,
std::shared_ptr<WriteFile> delegatedFile,
Expand Down
15 changes: 1 addition & 14 deletions velox/common/file/tests/FaultyFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ class FaultyReadFile : public ReadFile {
FaultyReadFile(
const std::string& path,
std::shared_ptr<ReadFile> delegatedFile,
FileFaultInjectionHook injectionHook,
folly::Executor* executor);
FileFaultInjectionHook injectionHook);

~FaultyReadFile() override{};

Expand Down Expand Up @@ -136,22 +135,10 @@ class FaultyReadFile : public ReadFile {
return delegatedFile_->getNaturalReadSize();
}

bool hasPreadvAsync() const override {
if (executor_ != nullptr) {
return true;
}
return delegatedFile_->hasPreadvAsync();
}

folly::SemiFuture<uint64_t> preadvAsync(
uint64_t offset,
const std::vector<folly::Range<char*>>& buffers) const override;

private:
const std::string path_;
const std::shared_ptr<ReadFile> delegatedFile_;
const FileFaultInjectionHook injectionHook_;
folly::Executor* const executor_;
};

class FaultyWriteFile : public WriteFile {
Expand Down
7 changes: 3 additions & 4 deletions velox/common/file/tests/FaultyFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,9 @@ std::unique_ptr<ReadFile> FaultyFileSystem::openFileForRead(
auto delegatedFile = getFileSystem(delegatedPath, config_)
->openFileForRead(delegatedPath, options);
return std::make_unique<FaultyReadFile>(
std::string(path),
std::move(delegatedFile),
[&](FaultFileOperation* op) { maybeInjectFileFault(op); },
executor_);
std::string(path), std::move(delegatedFile), [&](FaultFileOperation* op) {
maybeInjectFileFault(op);
});
}

std::unique_ptr<WriteFile> FaultyFileSystem::openFileForWrite(
Expand Down
8 changes: 0 additions & 8 deletions velox/common/file/tests/FaultyFileSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <memory>
#include <string_view>
#include "velox/common/file/tests/FaultyFile.h"
#include "velox/common/file/tests/FaultyFileSystem.h"

namespace facebook::velox::tests::utils {

Expand Down Expand Up @@ -76,12 +75,6 @@ class FaultyFileSystem : public FileSystem {

void rmdir(std::string_view path) override;

/// Sets executor for async read execution.
void setExecutor(folly::Executor* executor) {
std::lock_guard<std::mutex> l(mu_);
executor_ = executor;
}

/// Setups hook for file fault injection.
void setFileInjectionHook(FileFaultInjectionHook hook);

Expand Down Expand Up @@ -136,7 +129,6 @@ class FaultyFileSystem : public FileSystem {

mutable std::mutex mu_;
std::optional<FileInjections> fileInjections_;
folly::Executor* executor_;
};

/// Registers the faulty filesystem.
Expand Down
Loading

0 comments on commit 64d34e3

Please sign in to comment.