From 160d1f810654ba54f056032cff7d9f51306f2c46 Mon Sep 17 00:00:00 2001 From: Brett Boston Date: Fri, 17 May 2024 13:36:43 -0700 Subject: [PATCH] Shorten collecting phase + auto transition to reporting phase --- src/overlay/OverlayManagerImpl.cpp | 4 +- src/overlay/SurveyDataManager.cpp | 70 ++++++++++++++----------- src/overlay/SurveyDataManager.h | 21 ++++++-- src/overlay/SurveyManager.cpp | 6 ++- src/overlay/SurveyManager.h | 4 +- src/overlay/test/SurveyManagerTests.cpp | 7 ++- src/simulation/Simulation.cpp | 5 +- 7 files changed, 73 insertions(+), 44 deletions(-) diff --git a/src/overlay/OverlayManagerImpl.cpp b/src/overlay/OverlayManagerImpl.cpp index 731b99d298..9378f10f9e 100644 --- a/src/overlay/OverlayManagerImpl.cpp +++ b/src/overlay/OverlayManagerImpl.cpp @@ -716,7 +716,9 @@ OverlayManagerImpl::tick() } // Check and update the overlay survey state - mSurveyManager->updateSurveyPhase(); + mSurveyManager->updateSurveyPhase(getInboundAuthenticatedPeers(), + getOutboundAuthenticatedPeers(), + mApp.getConfig()); auto availablePendingSlots = availableOutboundPendingSlots(); if (availablePendingSlots == 0) diff --git a/src/overlay/SurveyDataManager.cpp b/src/overlay/SurveyDataManager.cpp index 41323d8e48..e0ef62d2a8 100644 --- a/src/overlay/SurveyDataManager.cpp +++ b/src/overlay/SurveyDataManager.cpp @@ -17,10 +17,10 @@ namespace stellar { namespace { -// Collecting phase is limited to 2 hours. If 2 hours pass without receiving -// a StopSurveyCollecting message the `SurveyDataManager` will reset all data -// and transition to the `INACTIVE` phase. -constexpr std::chrono::hours COLLECTING_PHASE_MAX_DURATION{2}; +// Collecting phase is limited to 30 minutes. If 30 minutes pass without +// receiving a StopSurveyCollecting message the `SurveyDataManager` will +// automatically transition to the reporting phase. +constexpr std::chrono::minutes COLLECTING_PHASE_MAX_DURATION{30}; // Reporting phase is limited to 3 hours, after which the // `SurveyDataManager` will reset all data and transition to the `INACTIVE` @@ -149,6 +149,35 @@ SurveyDataManager::startSurveyCollecting( return false; } +bool +SurveyDataManager::startReportingPhase( + std::map const& inboundPeers, + std::map const& outboundPeers, Config const& config) +{ + if (mPhase != SurveyPhase::COLLECTING || !mFinalInboundPeerData.empty() || + !mFinalOutboundPeerData.empty()) + { + emitInconsistencyError("startReportingPhase"); + return false; + } + + mPhase = SurveyPhase::REPORTING; + mCollectEndTime = mGetNow(); + + // Finalize peer and node data + finalizePeerData(inboundPeers, mCollectingInboundPeerData, + mFinalInboundPeerData); + finalizePeerData(outboundPeers, mCollectingOutboundPeerData, + mFinalOutboundPeerData); + finalizeNodeData(config); + + // Clear collecting data + mCollectingInboundPeerData.clear(); + mCollectingOutboundPeerData.clear(); + + return true; +} + bool SurveyDataManager::stopSurveyCollecting( TimeSlicedSurveyStopCollectingMessage const& msg, @@ -162,27 +191,7 @@ SurveyDataManager::stopSurveyCollecting( mSurveyor == msg.surveyorID) { CLOG_TRACE(Overlay, "Stopping survey collecting with nonce {}", nonce); - mPhase = SurveyPhase::REPORTING; - mCollectEndTime = mGetNow(); - - if (!mFinalInboundPeerData.empty() || !mFinalOutboundPeerData.empty()) - { - emitInconsistencyError("stopSurveyCollecting"); - return false; - } - - // Finalize peer and node data - finalizePeerData(inboundPeers, mCollectingInboundPeerData, - mFinalInboundPeerData); - finalizePeerData(outboundPeers, mCollectingOutboundPeerData, - mFinalOutboundPeerData); - finalizeNodeData(config); - - // Clear collecting data - mCollectingInboundPeerData.clear(); - mCollectingOutboundPeerData.clear(); - - return true; + return startReportingPhase(inboundPeers, outboundPeers, config); } CLOG_TRACE(Overlay, "Ignoring request to stop survey collecting with nonce {} " @@ -311,7 +320,9 @@ SurveyDataManager::setPhaseMaxDurationsForTesting( #endif void -SurveyDataManager::updateSurveyPhase() +SurveyDataManager::updateSurveyPhase( + std::map const& inboundPeers, + std::map const& outboundPeers, Config const& config) { switch (mPhase) { @@ -324,10 +335,9 @@ SurveyDataManager::updateSurveyPhase() if (mGetNow() > mCollectStartTime.value() + getCollectingPhaseMaxDuration()) { - CLOG_TRACE( - Overlay, - "Survey collecting phase has expired. Resetting survey data."); - reset(); + CLOG_TRACE(Overlay, "Survey collecting phase has expired. " + "Advancing to reporting phase."); + startReportingPhase(inboundPeers, outboundPeers, config); } break; case SurveyPhase::REPORTING: diff --git a/src/overlay/SurveyDataManager.h b/src/overlay/SurveyDataManager.h index d311018e0f..8f152c452e 100644 --- a/src/overlay/SurveyDataManager.h +++ b/src/overlay/SurveyDataManager.h @@ -136,8 +136,12 @@ class SurveyDataManager : public NonMovableOrCopyable bool surveyIsActive() const; // Checks and updates the phase of the survey if necessary. Resets the - // survey state upon transition to `SurveyPhase::INACTIVE`. - void updateSurveyPhase(); + // survey state upon transition to `SurveyPhase::INACTIVE`. Takes peer and + // config info in case the collecting phase times out and the survey + // automatically transitions to the reporting phase. + void updateSurveyPhase(std::map const& inboundPeers, + std::map const& outboundPeers, + Config const& config); #ifdef BUILD_TESTS // Call to use the provided duration as max for both collecting and @@ -188,9 +192,16 @@ class SurveyDataManager : public NonMovableOrCopyable // Reset survey data. Intended to be called when survey data expires. void reset(); - // Function to call when the impossible occurs. Logs an error and resets the - // survey. Use instead of `releaseAssert` as an overlay survey failure is - // not important enough to crash the program. + // Transition to the reporting phase. Should only be called from the + // collecting phase. Returns `false` if transition fails. + bool + startReportingPhase(std::map const& inboundPeers, + std::map const& outboundPeers, + Config const& config); + + // Function to call when the impossible occurs. Logs an error and resets + // the survey. Use instead of `releaseAssert` as an overlay survey + // failure is not important enough to crash the program. void emitInconsistencyError(std::string const& where); // Finalize node data into `mFinalNodeData`. Should only be called after diff --git a/src/overlay/SurveyManager.cpp b/src/overlay/SurveyManager.cpp index 2ab8eb6d7c..ec02d99a30 100644 --- a/src/overlay/SurveyManager.cpp +++ b/src/overlay/SurveyManager.cpp @@ -1142,9 +1142,11 @@ SurveyManager::recordDroppedPeer(Peer const& peer) } void -SurveyManager::updateSurveyPhase() +SurveyManager::updateSurveyPhase( + std::map const& inboundPeers, + std::map const& outboundPeers, Config const& config) { - mSurveyDataManager.updateSurveyPhase(); + mSurveyDataManager.updateSurveyPhase(inboundPeers, outboundPeers, config); } bool diff --git a/src/overlay/SurveyManager.h b/src/overlay/SurveyManager.h index 46b1817b9d..429cac12d6 100644 --- a/src/overlay/SurveyManager.h +++ b/src/overlay/SurveyManager.h @@ -81,7 +81,9 @@ class SurveyManager : public std::enable_shared_from_this, void modifyPeerData(Peer const& peer, std::function f); void recordDroppedPeer(Peer const& peer); - void updateSurveyPhase(); + void updateSurveyPhase(std::map const& inboundPeers, + std::map const& outboundPeers, + Config const& config); #ifdef BUILD_TESTS // Get a reference to the internal `SurveyDataManager` (for testing only) diff --git a/src/overlay/test/SurveyManagerTests.cpp b/src/overlay/test/SurveyManagerTests.cpp index 3f57e9816a..a27ccf18ab 100644 --- a/src/overlay/test/SurveyManagerTests.cpp +++ b/src/overlay/test/SurveyManagerTests.cpp @@ -669,15 +669,14 @@ TEST_CASE("Time sliced static topology survey", "[overlay][survey][topology]") // Wait for collecting phase to time out simulation->crankForAtLeast(phaseDuration, false); - // Check that all surveys are inactive + // Surveys should have automatically transitioned to reporting phase for (int i = A; i <= E; ++i) { auto& surveyDataManager = simulation->getNode(keyList[i]) ->getOverlayManager() .getSurveyManager() .getSurveyDataManagerForTesting(); - REQUIRE(!surveyDataManager.surveyIsActive()); - REQUIRE(!surveyDataManager.getNonce().has_value()); + REQUIRE(surveyDataManager.nonceIsReporting(nonce)); } } } @@ -891,7 +890,7 @@ TEST_CASE("Time sliced dynamic topology survey", "[overlay][survey][topology]") } // Advance survey - simulation->crankForAtLeast(phaseDuration, false); + simulation->crankForAtLeast(phaseDuration * 2, false); // All surveys should now be inactive checkSurveyState(/*expectedNonce*/ std::nullopt, /*isReporting*/ false, diff --git a/src/simulation/Simulation.cpp b/src/simulation/Simulation.cpp index 3311cd1f96..32c1262b3e 100644 --- a/src/simulation/Simulation.cpp +++ b/src/simulation/Simulation.cpp @@ -403,7 +403,10 @@ Simulation::crankNode(NodeID const& id, VirtualClock::time_point timeout) } // Update network survey phase - app->getOverlayManager().getSurveyManager().updateSurveyPhase(); + OverlayManager& om = app->getOverlayManager(); + om.getSurveyManager().updateSurveyPhase(om.getInboundAuthenticatedPeers(), + om.getOutboundAuthenticatedPeers(), + app->getConfig()); return count - quantumClicks; }