Skip to content

Commit

Permalink
Shorten collecting phase + auto transition to reporting phase
Browse files Browse the repository at this point in the history
  • Loading branch information
bboston7 committed May 17, 2024
1 parent f9f2698 commit 160d1f8
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 44 deletions.
4 changes: 3 additions & 1 deletion src/overlay/OverlayManagerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,9 @@ OverlayManagerImpl::tick()
}

// Check and update the overlay survey state
mSurveyManager->updateSurveyPhase();
mSurveyManager->updateSurveyPhase(getInboundAuthenticatedPeers(),
getOutboundAuthenticatedPeers(),
mApp.getConfig());

auto availablePendingSlots = availableOutboundPendingSlots();
if (availablePendingSlots == 0)
Expand Down
70 changes: 40 additions & 30 deletions src/overlay/SurveyDataManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ namespace stellar
{
namespace
{
// Collecting phase is limited to 2 hours. If 2 hours pass without receiving
// a StopSurveyCollecting message the `SurveyDataManager` will reset all data
// and transition to the `INACTIVE` phase.
constexpr std::chrono::hours COLLECTING_PHASE_MAX_DURATION{2};
// Collecting phase is limited to 30 minutes. If 30 minutes pass without
// receiving a StopSurveyCollecting message the `SurveyDataManager` will
// automatically transition to the reporting phase.
constexpr std::chrono::minutes COLLECTING_PHASE_MAX_DURATION{30};

// Reporting phase is limited to 3 hours, after which the
// `SurveyDataManager` will reset all data and transition to the `INACTIVE`
Expand Down Expand Up @@ -149,6 +149,35 @@ SurveyDataManager::startSurveyCollecting(
return false;
}

bool
SurveyDataManager::startReportingPhase(
std::map<NodeID, Peer::pointer> const& inboundPeers,
std::map<NodeID, Peer::pointer> const& outboundPeers, Config const& config)
{
if (mPhase != SurveyPhase::COLLECTING || !mFinalInboundPeerData.empty() ||
!mFinalOutboundPeerData.empty())
{
emitInconsistencyError("startReportingPhase");
return false;
}

mPhase = SurveyPhase::REPORTING;
mCollectEndTime = mGetNow();

// Finalize peer and node data
finalizePeerData(inboundPeers, mCollectingInboundPeerData,
mFinalInboundPeerData);
finalizePeerData(outboundPeers, mCollectingOutboundPeerData,
mFinalOutboundPeerData);
finalizeNodeData(config);

// Clear collecting data
mCollectingInboundPeerData.clear();
mCollectingOutboundPeerData.clear();

return true;
}

bool
SurveyDataManager::stopSurveyCollecting(
TimeSlicedSurveyStopCollectingMessage const& msg,
Expand All @@ -162,27 +191,7 @@ SurveyDataManager::stopSurveyCollecting(
mSurveyor == msg.surveyorID)
{
CLOG_TRACE(Overlay, "Stopping survey collecting with nonce {}", nonce);
mPhase = SurveyPhase::REPORTING;
mCollectEndTime = mGetNow();

if (!mFinalInboundPeerData.empty() || !mFinalOutboundPeerData.empty())
{
emitInconsistencyError("stopSurveyCollecting");
return false;
}

// Finalize peer and node data
finalizePeerData(inboundPeers, mCollectingInboundPeerData,
mFinalInboundPeerData);
finalizePeerData(outboundPeers, mCollectingOutboundPeerData,
mFinalOutboundPeerData);
finalizeNodeData(config);

// Clear collecting data
mCollectingInboundPeerData.clear();
mCollectingOutboundPeerData.clear();

return true;
return startReportingPhase(inboundPeers, outboundPeers, config);
}
CLOG_TRACE(Overlay,
"Ignoring request to stop survey collecting with nonce {} "
Expand Down Expand Up @@ -311,7 +320,9 @@ SurveyDataManager::setPhaseMaxDurationsForTesting(
#endif

void
SurveyDataManager::updateSurveyPhase()
SurveyDataManager::updateSurveyPhase(
std::map<NodeID, Peer::pointer> const& inboundPeers,
std::map<NodeID, Peer::pointer> const& outboundPeers, Config const& config)
{
switch (mPhase)
{
Expand All @@ -324,10 +335,9 @@ SurveyDataManager::updateSurveyPhase()
if (mGetNow() >
mCollectStartTime.value() + getCollectingPhaseMaxDuration())
{
CLOG_TRACE(
Overlay,
"Survey collecting phase has expired. Resetting survey data.");
reset();
CLOG_TRACE(Overlay, "Survey collecting phase has expired. "
"Advancing to reporting phase.");
startReportingPhase(inboundPeers, outboundPeers, config);
}
break;
case SurveyPhase::REPORTING:
Expand Down
21 changes: 16 additions & 5 deletions src/overlay/SurveyDataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,12 @@ class SurveyDataManager : public NonMovableOrCopyable
bool surveyIsActive() const;

// Checks and updates the phase of the survey if necessary. Resets the
// survey state upon transition to `SurveyPhase::INACTIVE`.
void updateSurveyPhase();
// survey state upon transition to `SurveyPhase::INACTIVE`. Takes peer and
// config info in case the collecting phase times out and the survey
// automatically transitions to the reporting phase.
void updateSurveyPhase(std::map<NodeID, Peer::pointer> const& inboundPeers,
std::map<NodeID, Peer::pointer> const& outboundPeers,
Config const& config);

#ifdef BUILD_TESTS
// Call to use the provided duration as max for both collecting and
Expand Down Expand Up @@ -188,9 +192,16 @@ class SurveyDataManager : public NonMovableOrCopyable
// Reset survey data. Intended to be called when survey data expires.
void reset();

// Function to call when the impossible occurs. Logs an error and resets the
// survey. Use instead of `releaseAssert` as an overlay survey failure is
// not important enough to crash the program.
// Transition to the reporting phase. Should only be called from the
// collecting phase. Returns `false` if transition fails.
bool
startReportingPhase(std::map<NodeID, Peer::pointer> const& inboundPeers,
std::map<NodeID, Peer::pointer> const& outboundPeers,
Config const& config);

// Function to call when the impossible occurs. Logs an error and resets
// the survey. Use instead of `releaseAssert` as an overlay survey
// failure is not important enough to crash the program.
void emitInconsistencyError(std::string const& where);

// Finalize node data into `mFinalNodeData`. Should only be called after
Expand Down
6 changes: 4 additions & 2 deletions src/overlay/SurveyManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1142,9 +1142,11 @@ SurveyManager::recordDroppedPeer(Peer const& peer)
}

void
SurveyManager::updateSurveyPhase()
SurveyManager::updateSurveyPhase(
std::map<NodeID, Peer::pointer> const& inboundPeers,
std::map<NodeID, Peer::pointer> const& outboundPeers, Config const& config)
{
mSurveyDataManager.updateSurveyPhase();
mSurveyDataManager.updateSurveyPhase(inboundPeers, outboundPeers, config);
}

bool
Expand Down
4 changes: 3 additions & 1 deletion src/overlay/SurveyManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ class SurveyManager : public std::enable_shared_from_this<SurveyManager>,
void modifyPeerData(Peer const& peer,
std::function<void(CollectingPeerData&)> f);
void recordDroppedPeer(Peer const& peer);
void updateSurveyPhase();
void updateSurveyPhase(std::map<NodeID, Peer::pointer> const& inboundPeers,
std::map<NodeID, Peer::pointer> const& outboundPeers,
Config const& config);

#ifdef BUILD_TESTS
// Get a reference to the internal `SurveyDataManager` (for testing only)
Expand Down
7 changes: 3 additions & 4 deletions src/overlay/test/SurveyManagerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,15 +669,14 @@ TEST_CASE("Time sliced static topology survey", "[overlay][survey][topology]")
// Wait for collecting phase to time out
simulation->crankForAtLeast(phaseDuration, false);

// Check that all surveys are inactive
// Surveys should have automatically transitioned to reporting phase
for (int i = A; i <= E; ++i)
{
auto& surveyDataManager = simulation->getNode(keyList[i])
->getOverlayManager()
.getSurveyManager()
.getSurveyDataManagerForTesting();
REQUIRE(!surveyDataManager.surveyIsActive());
REQUIRE(!surveyDataManager.getNonce().has_value());
REQUIRE(surveyDataManager.nonceIsReporting(nonce));
}
}
}
Expand Down Expand Up @@ -891,7 +890,7 @@ TEST_CASE("Time sliced dynamic topology survey", "[overlay][survey][topology]")
}

// Advance survey
simulation->crankForAtLeast(phaseDuration, false);
simulation->crankForAtLeast(phaseDuration * 2, false);

// All surveys should now be inactive
checkSurveyState(/*expectedNonce*/ std::nullopt, /*isReporting*/ false,
Expand Down
5 changes: 4 additions & 1 deletion src/simulation/Simulation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,10 @@ Simulation::crankNode(NodeID const& id, VirtualClock::time_point timeout)
}

// Update network survey phase
app->getOverlayManager().getSurveyManager().updateSurveyPhase();
OverlayManager& om = app->getOverlayManager();
om.getSurveyManager().updateSurveyPhase(om.getInboundAuthenticatedPeers(),
om.getOutboundAuthenticatedPeers(),
app->getConfig());

return count - quantumClicks;
}
Expand Down

0 comments on commit 160d1f8

Please sign in to comment.