diff --git a/velox/exec/fuzzer/WriterFuzzer.cpp b/velox/exec/fuzzer/WriterFuzzer.cpp index d3d60e49aaf2..9fd59ece22ea 100644 --- a/velox/exec/fuzzer/WriterFuzzer.cpp +++ b/velox/exec/fuzzer/WriterFuzzer.cpp @@ -17,6 +17,7 @@ #include +#include #include #include #include "velox/common/base/Fs.h" @@ -114,7 +115,18 @@ class WriterFuzzer { const std::vector& dataTypes, int32_t maxDepth, std::vector& names, - std::vector& types); + std::vector& types, + int32_t offset = 0); + + // Generates at least one and up to maxNumColumns columns + // with a random number of those columns overlapping as bucket by columns. + // Returns the name of the overlapping/gererated columns + // and the sort column offset due to overlapping with bucket columns. + std::tuple, int> generateSortColumns( + int32_t maxNumColumns, + std::vector& names, + std::vector& types, + std::vector& bucketColumns); // Generates input data for table write. std::vector generateInputData( @@ -342,12 +354,11 @@ void WriterFuzzer::go() { bucketCount = boost::random::uniform_int_distribution(1, 3)(rng_); - // TODO: sort columns can overlap as bucket columns // 50% of times test ordered write. if (vectorFuzzer_.coinToss(0.5)) { sortColumnOffset = names.size(); - auto sortColumns = generateColumns( - 3, "s", kSupportedSortColumnTypes_, 1, names, types); + auto [sortColumns, offset] = generateSortColumns(3, names, types, bucketColumns); + sortColumnOffset -= offset; sortBy.reserve(sortColumns.size()); for (const auto& sortByColumn : sortColumns) { sortBy.push_back(std::make_shared( @@ -392,11 +403,12 @@ std::vector WriterFuzzer::generateColumns( const std::vector& dataTypes, int32_t maxDepth, std::vector& names, - std::vector& types) { + std::vector& types, + const int32_t offset) { const auto numColumns = boost::random::uniform_int_distribution(1, maxNumColumns)(rng_); std::vector columns; - for (auto i = 0; i < numColumns; ++i) { + for (auto i = offset; i < numColumns; ++i) { columns.push_back(fmt::format("{}{}", prefix, i)); // Pick random, possibly complex, type. @@ -406,6 +418,33 @@ std::vector WriterFuzzer::generateColumns( return columns; } +std::tuple, int> WriterFuzzer::generateSortColumns( + int32_t maxNumColumns, + std::vector& names, + std::vector& types, + std::vector& bucketColumns) { + // A random number of sort columns will overlap as bucket columns, which are already generated + const auto maxOverlapColumns = std::min(maxNumColumns, bucketColumns.size()); + const auto numOverlapColumns = + boost::random::uniform_int_distribution(0, maxOverlapColumns)(rng_); + + auto overlapOffset = bucketColumns.size() - numOverlapColumns; + std::vector columns; + for (auto i = 0; i < numOverlapColumns; ++i) { + columns.push_back(bucketColumns[overlapOffset + i]); + } + + // Remaining columns which do not overlap as bucket by columns are added as new columns with prefix "s" + auto remainingColumns = maxNumColumns - numOverlapColumns; + if (remainingColumns > 0) { + auto nonOverlapColumns = generateColumns( + remainingColumns, "s", kSupportedSortColumnTypes_, 1, names, types, numOverlapColumns); + columns.insert(columns.end(), nonOverlapColumns.begin(), nonOverlapColumns.end()); + } + + return {columns, numOverlapColumns}; +} + std::vector WriterFuzzer::generateInputData( std::vector names, std::vector types,