From d9f90c84c0297293ac21d277ebbe38be72c54c97 Mon Sep 17 00:00:00 2001 From: Ed Hennis Date: Fri, 12 Jan 2024 12:42:33 -0500 Subject: [PATCH] Improve lifetime management of ledger objects (`SLE`s) to prevent runaway memory usage: (#4822) * Add logging for Application.cpp sweep() * Improve lifetime management of ledger objects (`SLE`s) * Only store SLE digest in CachedView; get SLEs from CachedSLEs * Also force release of last ledger used for path finding if there are no path finding requests to process * Count more ST objects (derive from `CountedObject`) * Track CachedView stats in CountedObjects * Rename the CachedView counters * Fix the scope of the digest lookup lock Before this patch, if you asked "is it caching?" It was always caching. --- src/ripple/app/ledger/InboundLedgers.h | 3 + src/ripple/app/ledger/LedgerReplayer.h | 21 +++ src/ripple/app/ledger/impl/InboundLedgers.cpp | 7 + src/ripple/app/ledger/impl/LedgerMaster.cpp | 5 + src/ripple/app/main/Application.cpp | 172 +++++++++++++++++- src/ripple/app/paths/PathRequests.cpp | 4 + src/ripple/consensus/Validations.h | 28 +++ src/ripple/ledger/CachedView.h | 5 +- src/ripple/ledger/impl/CachedView.cpp | 43 +++-- src/ripple/net/InfoSub.h | 2 +- src/ripple/protocol/STAccount.h | 4 +- src/ripple/protocol/STBitString.h | 3 +- src/ripple/protocol/STBlob.h | 6 +- src/ripple/protocol/STInteger.h | 3 +- src/ripple/protocol/STIssue.h | 2 +- src/ripple/protocol/STVector256.h | 3 +- src/ripple/protocol/STXChainBridge.h | 3 +- src/test/app/LedgerReplay_test.cpp | 6 + src/test/jtx/Env.h | 1 - 19 files changed, 283 insertions(+), 38 deletions(-) diff --git a/src/ripple/app/ledger/InboundLedgers.h b/src/ripple/app/ledger/InboundLedgers.h index dca4a80c54b..b12760153e2 100644 --- a/src/ripple/app/ledger/InboundLedgers.h +++ b/src/ripple/app/ledger/InboundLedgers.h @@ -83,6 +83,9 @@ class InboundLedgers virtual void stop() = 0; + + virtual std::size_t + cacheSize() = 0; }; std::unique_ptr diff --git a/src/ripple/app/ledger/LedgerReplayer.h b/src/ripple/app/ledger/LedgerReplayer.h index 6866250485d..b06dd2cc858 100644 --- a/src/ripple/app/ledger/LedgerReplayer.h +++ b/src/ripple/app/ledger/LedgerReplayer.h @@ -125,6 +125,27 @@ class LedgerReplayer final void stop(); + std::size_t + tasksSize() const + { + std::lock_guard lock(mtx_); + return tasks_.size(); + } + + std::size_t + deltasSize() const + { + std::lock_guard lock(mtx_); + return deltas_.size(); + } + + std::size_t + skipListsSize() const + { + std::lock_guard lock(mtx_); + return skipLists_.size(); + } + private: mutable std::mutex mtx_; std::vector> tasks_; diff --git a/src/ripple/app/ledger/impl/InboundLedgers.cpp b/src/ripple/app/ledger/impl/InboundLedgers.cpp index 7ee49b4547a..0bff434edbc 100644 --- a/src/ripple/app/ledger/impl/InboundLedgers.cpp +++ b/src/ripple/app/ledger/impl/InboundLedgers.cpp @@ -411,6 +411,13 @@ class InboundLedgersImp : public InboundLedgers mRecentFailures.clear(); } + std::size_t + cacheSize() override + { + ScopedLockType lock(mLock); + return mLedgers.size(); + } + private: clock_type& m_clock; diff --git a/src/ripple/app/ledger/impl/LedgerMaster.cpp b/src/ripple/app/ledger/impl/LedgerMaster.cpp index 0006d6b0dbd..9388a3005ba 100644 --- a/src/ripple/app/ledger/impl/LedgerMaster.cpp +++ b/src/ripple/app/ledger/impl/LedgerMaster.cpp @@ -1543,6 +1543,7 @@ LedgerMaster::updatePaths() if (app_.getOPs().isNeedNetworkLedger()) { --mPathFindThread; + mPathLedger.reset(); JLOG(m_journal.debug()) << "Need network ledger for updating paths"; return; } @@ -1568,6 +1569,7 @@ LedgerMaster::updatePaths() else { // Nothing to do --mPathFindThread; + mPathLedger.reset(); JLOG(m_journal.debug()) << "Nothing to do for updating paths"; return; } @@ -1584,6 +1586,7 @@ LedgerMaster::updatePaths() << "Published ledger too old for updating paths"; std::lock_guard ml(m_mutex); --mPathFindThread; + mPathLedger.reset(); return; } } @@ -1596,6 +1599,7 @@ LedgerMaster::updatePaths() if (!pathRequests.requestsPending()) { --mPathFindThread; + mPathLedger.reset(); JLOG(m_journal.debug()) << "No path requests found. Nothing to do for updating " "paths. " @@ -1613,6 +1617,7 @@ LedgerMaster::updatePaths() << "No path requests left. No need for further updating " "paths"; --mPathFindThread; + mPathLedger.reset(); return; } } diff --git a/src/ripple/app/main/Application.cpp b/src/ripple/app/main/Application.cpp index 871daffec32..9aad155d876 100644 --- a/src/ripple/app/main/Application.cpp +++ b/src/ripple/app/main/Application.cpp @@ -1065,20 +1065,172 @@ class ApplicationImp : public Application, public BasicApp // VFALCO TODO fix the dependency inversion using an observer, // have listeners register for "onSweep ()" notification. - nodeFamily_.sweep(); + { + std::shared_ptr const fullBelowCache = + nodeFamily_.getFullBelowCache(0); + + std::shared_ptr const treeNodeCache = + nodeFamily_.getTreeNodeCache(0); + + std::size_t const oldFullBelowSize = fullBelowCache->size(); + std::size_t const oldTreeNodeSize = treeNodeCache->size(); + + nodeFamily_.sweep(); + + JLOG(m_journal.debug()) + << "NodeFamily::FullBelowCache sweep. Size before: " + << oldFullBelowSize + << "; size after: " << fullBelowCache->size(); + + JLOG(m_journal.debug()) + << "NodeFamily::TreeNodeCache sweep. Size before: " + << oldTreeNodeSize << "; size after: " << treeNodeCache->size(); + } if (shardFamily_) + { + std::size_t const oldFullBelowSize = + shardFamily_->getFullBelowCacheSize(); + std::size_t const oldTreeNodeSize = + shardFamily_->getTreeNodeCacheSize().second; + shardFamily_->sweep(); - getMasterTransaction().sweep(); - getNodeStore().sweep(); + + JLOG(m_journal.debug()) + << "ShardFamily::FullBelowCache sweep. Size before: " + << oldFullBelowSize + << "; size after: " << shardFamily_->getFullBelowCacheSize(); + + JLOG(m_journal.debug()) + << "ShardFamily::TreeNodeCache sweep. Size before: " + << oldTreeNodeSize << "; size after: " + << shardFamily_->getTreeNodeCacheSize().second; + } + { + TaggedCache const& masterTxCache = + getMasterTransaction().getCache(); + + std::size_t const oldMasterTxSize = masterTxCache.size(); + + getMasterTransaction().sweep(); + + JLOG(m_journal.debug()) + << "MasterTransaction sweep. Size before: " << oldMasterTxSize + << "; size after: " << masterTxCache.size(); + } + { + // Does not appear to have an associated cache. + getNodeStore().sweep(); + } if (shardStore_) + { + // Does not appear to have an associated cache. shardStore_->sweep(); - getLedgerMaster().sweep(); - getTempNodeCache().sweep(); - getValidations().expire(m_journal); - getInboundLedgers().sweep(); - getLedgerReplayer().sweep(); - m_acceptedLedgerCache.sweep(); - cachedSLEs_.sweep(); + } + { + std::size_t const oldLedgerMasterCacheSize = + getLedgerMaster().getFetchPackCacheSize(); + + getLedgerMaster().sweep(); + + JLOG(m_journal.debug()) + << "LedgerMaster sweep. Size before: " + << oldLedgerMasterCacheSize << "; size after: " + << getLedgerMaster().getFetchPackCacheSize(); + } + { + // NodeCache == TaggedCache + std::size_t const oldTempNodeCacheSize = getTempNodeCache().size(); + + getTempNodeCache().sweep(); + + JLOG(m_journal.debug()) + << "TempNodeCache sweep. Size before: " << oldTempNodeCacheSize + << "; size after: " << getTempNodeCache().size(); + } + { + std::size_t const oldCurrentCacheSize = + getValidations().sizeOfCurrentCache(); + std::size_t const oldSizeSeqEnforcesSize = + getValidations().sizeOfSeqEnforcersCache(); + std::size_t const oldByLedgerSize = + getValidations().sizeOfByLedgerCache(); + std::size_t const oldBySequenceSize = + getValidations().sizeOfBySequenceCache(); + + getValidations().expire(m_journal); + + JLOG(m_journal.debug()) + << "Validations Current expire. Size before: " + << oldCurrentCacheSize + << "; size after: " << getValidations().sizeOfCurrentCache(); + + JLOG(m_journal.debug()) + << "Validations SeqEnforcer expire. Size before: " + << oldSizeSeqEnforcesSize << "; size after: " + << getValidations().sizeOfSeqEnforcersCache(); + + JLOG(m_journal.debug()) + << "Validations ByLedger expire. Size before: " + << oldByLedgerSize + << "; size after: " << getValidations().sizeOfByLedgerCache(); + + JLOG(m_journal.debug()) + << "Validations BySequence expire. Size before: " + << oldBySequenceSize + << "; size after: " << getValidations().sizeOfBySequenceCache(); + } + { + std::size_t const oldInboundLedgersSize = + getInboundLedgers().cacheSize(); + + getInboundLedgers().sweep(); + + JLOG(m_journal.debug()) + << "InboundLedgers sweep. Size before: " + << oldInboundLedgersSize + << "; size after: " << getInboundLedgers().cacheSize(); + } + { + size_t const oldTasksSize = getLedgerReplayer().tasksSize(); + size_t const oldDeltasSize = getLedgerReplayer().deltasSize(); + size_t const oldSkipListsSize = getLedgerReplayer().skipListsSize(); + + getLedgerReplayer().sweep(); + + JLOG(m_journal.debug()) + << "LedgerReplayer tasks sweep. Size before: " << oldTasksSize + << "; size after: " << getLedgerReplayer().tasksSize(); + + JLOG(m_journal.debug()) + << "LedgerReplayer deltas sweep. Size before: " + << oldDeltasSize + << "; size after: " << getLedgerReplayer().deltasSize(); + + JLOG(m_journal.debug()) + << "LedgerReplayer skipLists sweep. Size before: " + << oldSkipListsSize + << "; size after: " << getLedgerReplayer().skipListsSize(); + } + { + std::size_t const oldAcceptedLedgerSize = + m_acceptedLedgerCache.size(); + + m_acceptedLedgerCache.sweep(); + + JLOG(m_journal.debug()) + << "AcceptedLedgerCache sweep. Size before: " + << oldAcceptedLedgerSize + << "; size after: " << m_acceptedLedgerCache.size(); + } + { + std::size_t const oldCachedSLEsSize = cachedSLEs_.size(); + + cachedSLEs_.sweep(); + + JLOG(m_journal.debug()) + << "CachedSLEs sweep. Size before: " << oldCachedSLEsSize + << "; size after: " << cachedSLEs_.size(); + } #ifdef RIPPLED_REPORTING if (auto pg = dynamic_cast(&*mRelationalDatabase)) diff --git a/src/ripple/app/paths/PathRequests.cpp b/src/ripple/app/paths/PathRequests.cpp index 951f55dc800..ac051e5f0c5 100644 --- a/src/ripple/app/paths/PathRequests.cpp +++ b/src/ripple/app/paths/PathRequests.cpp @@ -200,6 +200,9 @@ PathRequests::updateAll(std::shared_ptr const& inLedger) break; } + // Hold on to the line cache until after the lock is released, so it can + // be destroyed outside of the lock + std::shared_ptr lastCache; { // Get the latest requests, cache, and ledger for next pass std::lock_guard sl(mLock); @@ -207,6 +210,7 @@ PathRequests::updateAll(std::shared_ptr const& inLedger) if (requests_.empty()) break; requests = requests_; + lastCache = cache; cache = getLineCache(cache->getLedger(), false); } } while (!app_.getJobQueue().isStopping()); diff --git a/src/ripple/consensus/Validations.h b/src/ripple/consensus/Validations.h index a9dbd5585e2..1bef76d961c 100644 --- a/src/ripple/consensus/Validations.h +++ b/src/ripple/consensus/Validations.h @@ -1142,6 +1142,34 @@ class Validations return laggards; } + + std::size_t + sizeOfCurrentCache() const + { + std::lock_guard lock{mutex_}; + return current_.size(); + } + + std::size_t + sizeOfSeqEnforcersCache() const + { + std::lock_guard lock{mutex_}; + return seqEnforcers_.size(); + } + + std::size_t + sizeOfByLedgerCache() const + { + std::lock_guard lock{mutex_}; + return byLedger_.size(); + } + + std::size_t + sizeOfBySequenceCache() const + { + std::lock_guard lock{mutex_}; + return bySequence_.size(); + } }; } // namespace ripple diff --git a/src/ripple/ledger/CachedView.h b/src/ripple/ledger/CachedView.h index e827d23c8c3..da483b34948 100644 --- a/src/ripple/ledger/CachedView.h +++ b/src/ripple/ledger/CachedView.h @@ -38,10 +38,7 @@ class CachedViewImpl : public DigestAwareReadView DigestAwareReadView const& base_; CachedSLEs& cache_; std::mutex mutable mutex_; - std::unordered_map< - key_type, - std::shared_ptr, - hardened_hash<>> mutable map_; + std::unordered_map> mutable map_; public: CachedViewImpl() = delete; diff --git a/src/ripple/ledger/impl/CachedView.cpp b/src/ripple/ledger/impl/CachedView.cpp index 3ad8a812b01..210031346d3 100644 --- a/src/ripple/ledger/impl/CachedView.cpp +++ b/src/ripple/ledger/impl/CachedView.cpp @@ -33,25 +33,40 @@ CachedViewImpl::exists(Keylet const& k) const std::shared_ptr CachedViewImpl::read(Keylet const& k) const { - { - std::lock_guard lock(mutex_); - auto const iter = map_.find(k.key); - if (iter != map_.end()) + static CountedObjects::Counter hits{"CachedView::hit"}; + static CountedObjects::Counter hitsexpired{"CachedView::hitExpired"}; + static CountedObjects::Counter misses{"CachedView::miss"}; + bool cacheHit = false; + bool baseRead = false; + + auto const digest = [&]() -> std::optional { { - if (!iter->second || !k.check(*iter->second)) - return nullptr; - return iter->second; + std::lock_guard lock(mutex_); + auto const iter = map_.find(k.key); + if (iter != map_.end()) + { + cacheHit = true; + return iter->second; + } } - } - auto const digest = base_.digest(k.key); + return base_.digest(k.key); + }(); if (!digest) return nullptr; - auto sle = cache_.fetch(*digest, [&]() { return base_.read(k); }); + auto sle = cache_.fetch(*digest, [&]() { + baseRead = true; + return base_.read(k); + }); + if (cacheHit && baseRead) + hitsexpired.increment(); + else if (cacheHit) + hits.increment(); + else + misses.increment(); std::lock_guard lock(mutex_); - auto const er = map_.emplace(k.key, sle); - auto const& iter = er.first; + auto const er = map_.emplace(k.key, *digest); bool const inserted = er.second; - if (iter->second && !k.check(*iter->second)) + if (sle && !k.check(*sle)) { if (!inserted) { @@ -62,7 +77,7 @@ CachedViewImpl::read(Keylet const& k) const } return nullptr; } - return iter->second; + return sle; } } // namespace detail diff --git a/src/ripple/net/InfoSub.h b/src/ripple/net/InfoSub.h index 092ba0c0035..33441dde632 100644 --- a/src/ripple/net/InfoSub.h +++ b/src/ripple/net/InfoSub.h @@ -33,7 +33,7 @@ namespace ripple { // Operations that clients may wish to perform against the network // Master operational handler, server sequencer, network tracker -class InfoSubRequest +class InfoSubRequest : public CountedObject { public: using pointer = std::shared_ptr; diff --git a/src/ripple/protocol/STAccount.h b/src/ripple/protocol/STAccount.h index c622a7c5eef..c44327fe566 100644 --- a/src/ripple/protocol/STAccount.h +++ b/src/ripple/protocol/STAccount.h @@ -20,13 +20,15 @@ #ifndef RIPPLE_PROTOCOL_STACCOUNT_H_INCLUDED #define RIPPLE_PROTOCOL_STACCOUNT_H_INCLUDED +#include #include #include + #include namespace ripple { -class STAccount final : public STBase +class STAccount final : public STBase, public CountedObject { private: // The original implementation of STAccount kept the value in an STBlob. diff --git a/src/ripple/protocol/STBitString.h b/src/ripple/protocol/STBitString.h index 45d1a3d6f05..decdfa64861 100644 --- a/src/ripple/protocol/STBitString.h +++ b/src/ripple/protocol/STBitString.h @@ -20,6 +20,7 @@ #ifndef RIPPLE_PROTOCOL_STBITSTRING_H_INCLUDED #define RIPPLE_PROTOCOL_STBITSTRING_H_INCLUDED +#include #include #include @@ -30,7 +31,7 @@ namespace ripple { // information of a template parameterized by an unsigned type. This RTTI // information is needed to write gdb pretty printers. template -class STBitString final : public STBase +class STBitString final : public STBase, public CountedObject> { static_assert(Bits > 0, "Number of bits must be positive"); diff --git a/src/ripple/protocol/STBlob.h b/src/ripple/protocol/STBlob.h index 58a61206b1a..3b2731be7f0 100644 --- a/src/ripple/protocol/STBlob.h +++ b/src/ripple/protocol/STBlob.h @@ -21,8 +21,10 @@ #define RIPPLE_PROTOCOL_STBLOB_H_INCLUDED #include +#include #include #include + #include #include #include @@ -30,7 +32,7 @@ namespace ripple { // variable length byte string -class STBlob : public STBase +class STBlob : public STBase, public CountedObject { Buffer value_; @@ -88,7 +90,7 @@ class STBlob : public STBase }; inline STBlob::STBlob(STBlob const& rhs) - : STBase(rhs), value_(rhs.data(), rhs.size()) + : STBase(rhs), CountedObject(rhs), value_(rhs.data(), rhs.size()) { } diff --git a/src/ripple/protocol/STInteger.h b/src/ripple/protocol/STInteger.h index fcd007e4562..aaf0f8c904e 100644 --- a/src/ripple/protocol/STInteger.h +++ b/src/ripple/protocol/STInteger.h @@ -20,12 +20,13 @@ #ifndef RIPPLE_PROTOCOL_STINTEGER_H_INCLUDED #define RIPPLE_PROTOCOL_STINTEGER_H_INCLUDED +#include #include namespace ripple { template -class STInteger : public STBase +class STInteger : public STBase, public CountedObject> { public: using value_type = Integer; diff --git a/src/ripple/protocol/STIssue.h b/src/ripple/protocol/STIssue.h index 80a37b305fc..38ca136e017 100644 --- a/src/ripple/protocol/STIssue.h +++ b/src/ripple/protocol/STIssue.h @@ -28,7 +28,7 @@ namespace ripple { -class STIssue final : public STBase +class STIssue final : public STBase, CountedObject { private: Issue issue_{xrpIssue()}; diff --git a/src/ripple/protocol/STVector256.h b/src/ripple/protocol/STVector256.h index 87d65036a2a..bf4a1cbec44 100644 --- a/src/ripple/protocol/STVector256.h +++ b/src/ripple/protocol/STVector256.h @@ -20,13 +20,14 @@ #ifndef RIPPLE_PROTOCOL_STVECTOR256_H_INCLUDED #define RIPPLE_PROTOCOL_STVECTOR256_H_INCLUDED +#include #include #include #include namespace ripple { -class STVector256 : public STBase +class STVector256 : public STBase, public CountedObject { std::vector mValue; diff --git a/src/ripple/protocol/STXChainBridge.h b/src/ripple/protocol/STXChainBridge.h index 44cd6a480f7..537a1d160b2 100644 --- a/src/ripple/protocol/STXChainBridge.h +++ b/src/ripple/protocol/STXChainBridge.h @@ -20,6 +20,7 @@ #ifndef RIPPLE_PROTOCOL_STXCHAINBRIDGE_H_INCLUDED #define RIPPLE_PROTOCOL_STXCHAINBRIDGE_H_INCLUDED +#include #include #include #include @@ -29,7 +30,7 @@ namespace ripple { class Serializer; class STObject; -class STXChainBridge final : public STBase +class STXChainBridge final : public STBase, public CountedObject { STAccount lockingChainDoor_{sfLockingChainDoor}; STIssue lockingChainIssue_{sfLockingChainIssue}; diff --git a/src/test/app/LedgerReplay_test.cpp b/src/test/app/LedgerReplay_test.cpp index b535739353b..41c9a71e9f6 100644 --- a/src/test/app/LedgerReplay_test.cpp +++ b/src/test/app/LedgerReplay_test.cpp @@ -173,6 +173,12 @@ class MagicInboundLedgers : public InboundLedgers { } + virtual size_t + cacheSize() override + { + return 0; + } + LedgerMaster& ledgerSource; LedgerMaster& ledgerSink; InboundLedgersBehavior bhvr; diff --git a/src/test/jtx/Env.h b/src/test/jtx/Env.h index 6a55f2f9141..74e9e057de8 100644 --- a/src/test/jtx/Env.h +++ b/src/test/jtx/Env.h @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include