Skip to content

Commit

Permalink
[Core] Shuffle the configured local directories for local spills (#3525)
Browse files Browse the repository at this point in the history
  • Loading branch information
marin-ma authored Oct 26, 2023
1 parent dd7eab4 commit 9cd88de
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions cpp/core/shuffle/LocalPartitionWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/

#include "shuffle/LocalPartitionWriter.h"
#include <random>
#include <thread>
#include "shuffle/Utils.h"
#include "utils/DebugOut.h"
Expand All @@ -32,6 +33,11 @@ std::string LocalPartitionWriterBase::nextSpilledFileDir() {

arrow::Status LocalPartitionWriterBase::setLocalDirs() {
ARROW_ASSIGN_OR_RAISE(configuredDirs_, getConfiguredLocalDirs());
// Shuffle the configured local directories. This prevents each task from using the same directory for spilled files.
std::random_device rd;
std::default_random_engine engine(rd());
std::shuffle(configuredDirs_.begin(), configuredDirs_.end(), engine);

subDirSelection_.assign(configuredDirs_.size(), 0);

// Both data_file and shuffle_index_file should be set through jni.
Expand Down

0 comments on commit 9cd88de

Please sign in to comment.