Skip to content

Commit

Permalink
Fix a deadlock
Browse files Browse the repository at this point in the history
This deadlock was introduced when updating sendrecv_test.cu for
Connection being returned in a future.
  • Loading branch information
olsaarik committed Aug 31, 2023
1 parent 60d54b9 commit 87a034a
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions test/mscclpp-test/sendrecv_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,16 @@ void SendRecvTestEngine::setupConnections() {

auto sendConnFuture =
comm_->connectOnSetup(sendToRank, 0, getTransport(args_.rank, sendToRank, args_.nRanksPerNode, ibDevice));
comm_->setup();
smSemaphores.push_back(std::make_shared<mscclpp::SmDevice2DeviceSemaphore>(*comm_, sendConnFuture.get()));
if (recvFromRank != sendToRank) {
auto recvConnFuture =
comm_->connectOnSetup(recvFromRank, 0, getTransport(args_.rank, recvFromRank, args_.nRanksPerNode, ibDevice));
comm_->setup();
smSemaphores.push_back(std::make_shared<mscclpp::SmDevice2DeviceSemaphore>(*comm_, sendConnFuture.get()));
smSemaphores.push_back(std::make_shared<mscclpp::SmDevice2DeviceSemaphore>(*comm_, recvConnFuture.get()));
} else {
// reuse the send channel if worldSize is 2
smSemaphores.push_back(smSemaphores[0]);
comm_->setup();
smSemaphores.push_back(std::make_shared<mscclpp::SmDevice2DeviceSemaphore>(*comm_, sendConnFuture.get()));
smSemaphores.push_back(smSemaphores[0]); // reuse the send channel if worldSize is 2
}
comm_->setup();

Expand Down

0 comments on commit 87a034a

Please sign in to comment.