From e17f209d2b99a66455af4a8e3c61c8aa5726693f Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Sat, 25 Nov 2023 11:40:24 +0000 Subject: [PATCH 01/10] 1664 Add memory constrained cache --- .../lru_ordered_memory_constrained_cache.hpp | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 thirdparty/lru_ordered_memory_constrained_cache.hpp diff --git a/thirdparty/lru_ordered_memory_constrained_cache.hpp b/thirdparty/lru_ordered_memory_constrained_cache.hpp new file mode 100644 index 00000000..b53a9e0d --- /dev/null +++ b/thirdparty/lru_ordered_memory_constrained_cache.hpp @@ -0,0 +1,134 @@ +/* + * Author: Alexander Ponomarev + * Author: Stan Kladko + * + * This is cache that cleans the last recently used items + * until both the size in total number of elements and the size + * in bytes are satisfied + */ + +#ifndef LRUORDEREDCACHE_MEMORY_CONSTRAINED_HPP_INCLUDED +#define LRUORDEREDCACHE_MEMORY_CONSTRAINED_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include + +#define CACHE_READ_LOCK( _M_ ) std::shared_lock< std::shared_mutex > _read_lock_( _M_ ); +#define CACHE_WRITE_LOCK( _M_ ) std::unique_lock< std::shared_mutex > _write_lock_( _M_ ); + +namespace cache { + + constexpr size_t MAX_VALUE_SIZE = 1024 * 1024 * 1024; // 1 GiGABYTE + + template + class lru_ordered_memory_constrained_cache { + + std::shared_mutex m; + + public: + typedef typename std::pair key_value_pair_t; + typedef typename std::list::iterator list_iterator_t; + + lru_ordered_memory_constrained_cache(size_t max_size, size_t max_bytes) : + _max_size(max_size), _max_bytes(max_bytes) { + } + + void put(const key_t& key, const value_t& value, size_t value_size) { + if (value_size > MAX_VALUE_SIZE) { + throw std::length_error( "Item size too large:" + std::to_string(value_size)); + } + + CACHE_WRITE_LOCK(m) + + auto it = _cache_items_map.find(key); + _cache_items_list.push_front(key_value_pair_t(key, value)); + if (it != _cache_items_map.end()) { + auto item_size = _cache_items_sizes_map.at(key); + if (item_size > _total_bytes) { + throw std::underflow_error("Item size more than total bytes" + + to_string(item_size) + ":" + to_string(_total_bytes)); + } + _total_bytes -= item_size; + _cache_items_list.erase(it->second); + _cache_items_sizes_map.erase(key); + _cache_items_map.erase(it); + } + _cache_items_map[key] = _cache_items_list.begin(); + _cache_items_sizes_map[key] = value_size; + _total_bytes += value_size; + + while (_cache_items_map.size() > _max_size || _total_bytes > _max_bytes) { + auto last = _cache_items_list.end(); + last--; + auto item_size = _cache_items_map.at(last->first); + if ( item_size > _total_bytes ) { + throw std::underflow_error( "Item size more than total bytes" + + to_string( item_size ) + ":" + + to_string( _total_bytes ) ); + } + _total_bytes -= item_size; + _cache_items_sizes_map.erase(last->first); + _cache_items_map.erase(last->first); + _cache_items_list.pop_back(); + } + } + + const value_t& get(const key_t& key) { + + CACHE_WRITE_LOCK(m); + + auto it = _cache_items_map.find(key); + if (it == _cache_items_map.end()) { + throw std::range_error("There is no such key in cache"); + } else { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + return it->second->second; + } + } + + std::any getIfExists(const key_t& key) { + + CACHE_WRITE_LOCK(m); + + auto it = _cache_items_map.find(key); + if (it == _cache_items_map.end()) { + return std::any(); + } else { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + return it->second->second; + } + } + + + bool exists(const key_t& key) { + + CACHE_READ_LOCK(m); + + return _cache_items_map.find(key) != _cache_items_map.end(); + } + + size_t size() { + CACHE_READ_LOCK(m); + return _cache_items_map.size(); + } + + size_t total_bytes() { + return _total_bytes; + } + + private: + std::list _cache_items_list; + std::map _cache_items_map; + std::map _cache_items_sizes_map; + size_t _max_size; + size_t _max_bytes; + std::atomic _total_bytes = 0; + }; + +} // namespace cache + +#endif From 99f11da68d575c1bbf0d837431407bce074e338d Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Sat, 25 Nov 2023 11:55:05 +0000 Subject: [PATCH 02/10] 1664 memory constrained cache --- thirdparty/lru_ordered_memory_constrained_cache.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/thirdparty/lru_ordered_memory_constrained_cache.hpp b/thirdparty/lru_ordered_memory_constrained_cache.hpp index b53a9e0d..71908613 100644 --- a/thirdparty/lru_ordered_memory_constrained_cache.hpp +++ b/thirdparty/lru_ordered_memory_constrained_cache.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #define CACHE_READ_LOCK( _M_ ) std::shared_lock< std::shared_mutex > _read_lock_( _M_ ); #define CACHE_WRITE_LOCK( _M_ ) std::unique_lock< std::shared_mutex > _write_lock_( _M_ ); From 790620a5dfc002e25786aa583cc1540447f1eddf Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Sat, 25 Nov 2023 11:56:44 +0000 Subject: [PATCH 03/10] 1664 memory cache --- thirdparty/lru_ordered_memory_constrained_cache.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/thirdparty/lru_ordered_memory_constrained_cache.hpp b/thirdparty/lru_ordered_memory_constrained_cache.hpp index 71908613..e10fffbc 100644 --- a/thirdparty/lru_ordered_memory_constrained_cache.hpp +++ b/thirdparty/lru_ordered_memory_constrained_cache.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #define CACHE_READ_LOCK( _M_ ) std::shared_lock< std::shared_mutex > _read_lock_( _M_ ); #define CACHE_WRITE_LOCK( _M_ ) std::unique_lock< std::shared_mutex > _write_lock_( _M_ ); From 86f4e2370d4a9e129360b6a711a9211783985a5b Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Sat, 25 Nov 2023 13:44:04 +0000 Subject: [PATCH 04/10] 1664 fixed typo --- thirdparty/lru_ordered_memory_constrained_cache.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/lru_ordered_memory_constrained_cache.hpp b/thirdparty/lru_ordered_memory_constrained_cache.hpp index e10fffbc..b206ad6b 100644 --- a/thirdparty/lru_ordered_memory_constrained_cache.hpp +++ b/thirdparty/lru_ordered_memory_constrained_cache.hpp @@ -49,7 +49,7 @@ namespace cache { auto it = _cache_items_map.find(key); _cache_items_list.push_front(key_value_pair_t(key, value)); if (it != _cache_items_map.end()) { - auto item_size = _cache_items_sizes_map.at(key); + size_t item_size = _cache_items_sizes_map.at(key); if (item_size > _total_bytes) { throw std::underflow_error("Item size more than total bytes" + to_string(item_size) + ":" + to_string(_total_bytes)); @@ -66,7 +66,7 @@ namespace cache { while (_cache_items_map.size() > _max_size || _total_bytes > _max_bytes) { auto last = _cache_items_list.end(); last--; - auto item_size = _cache_items_map.at(last->first); + size_t item_size = _cache_items_sizes_map.at(last->first); if ( item_size > _total_bytes ) { throw std::underflow_error( "Item size more than total bytes" + to_string( item_size ) + ":" + From 7f5fa4eb71c0f3dcde19b7f64d327a9811cf8adb Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Sat, 25 Nov 2023 13:51:56 +0000 Subject: [PATCH 05/10] 1554 fixed to string --- thirdparty/lru_ordered_memory_constrained_cache.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/lru_ordered_memory_constrained_cache.hpp b/thirdparty/lru_ordered_memory_constrained_cache.hpp index b206ad6b..2e969074 100644 --- a/thirdparty/lru_ordered_memory_constrained_cache.hpp +++ b/thirdparty/lru_ordered_memory_constrained_cache.hpp @@ -52,7 +52,7 @@ namespace cache { size_t item_size = _cache_items_sizes_map.at(key); if (item_size > _total_bytes) { throw std::underflow_error("Item size more than total bytes" + - to_string(item_size) + ":" + to_string(_total_bytes)); + std::to_string(item_size) + ":" + to_string(_total_bytes)); } _total_bytes -= item_size; _cache_items_list.erase(it->second); @@ -69,7 +69,7 @@ namespace cache { size_t item_size = _cache_items_sizes_map.at(last->first); if ( item_size > _total_bytes ) { throw std::underflow_error( "Item size more than total bytes" + - to_string( item_size ) + ":" + + std::to_string( item_size ) + ":" + to_string( _total_bytes ) ); } _total_bytes -= item_size; From 918e8373e3daadf4400d90e573addd8c22f4a8f1 Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Fri, 1 Dec 2023 20:10:37 +0000 Subject: [PATCH 06/10] #813 eth_sync adding api function --- monitoring/StuckDetectionAgent.cpp | 63 ++++++++++++++---------------- monitoring/StuckDetectionAgent.h | 6 ++- 2 files changed, 33 insertions(+), 36 deletions(-) diff --git a/monitoring/StuckDetectionAgent.cpp b/monitoring/StuckDetectionAgent.cpp index 85a97698..616bb188 100644 --- a/monitoring/StuckDetectionAgent.cpp +++ b/monitoring/StuckDetectionAgent.cpp @@ -69,56 +69,51 @@ void StuckDetectionAgent::StuckDetectionLoop( StuckDetectionAgent* _agent ) { LOG( info, "StuckDetection agent: started monitoring." ); + // determine if this is the first restart, or there we restarts + // before + auto numberOfPreviousRestarts = _agent->getNumberOfPreviousRestarts(); - uint64_t restartIteration = 1; - - while ( true ) { - if ( _agent->getSchain()->getNode()->isExitRequested() ) - return; - auto restartFileName = _agent->createStuckFileName( restartIteration ); - - if ( !boost::filesystem::exists( restartFileName ) ) { - break; - } - restartIteration++; + if ( numberOfPreviousRestarts > 0 ) { + LOG( info, "Stuck detection engine: previous restarts detected:" << numberOfPreviousRestarts ); } - if ( restartIteration > 1 ) { - LOG( info, "Stuck detection engine: previous restarts detected:" << to_string( - restartIteration - 1 ) ); - } - - - if ( _agent->getSchain()->getNode()->isExitRequested() ) - return; + uint64_t restartIteration = numberOfPreviousRestarts + 1; + uint64_t whenToRestart = 0; - uint64_t restartTime = 0; - uint64_t sleepTime = _agent->getSchain()->getNode()->getStuckMonitoringIntervalMs() * 1000; - - while ( restartTime == 0 ) { - if ( _agent->getSchain()->getNode()->isExitRequested() ) - return; + // loop until stuck is detected + do { try { - usleep( sleepTime ); _agent->getSchain()->getNode()->exitCheck(); - restartTime = _agent->checkForRestart( restartIteration ); + usleep(_agent->getSchain()->getNode()->getStuckMonitoringIntervalMs() * 1000); + // this will return non-zero if skaled needs to be restarted + whenToRestart = _agent->doStuckCheck(restartIteration); } catch ( ExitRequestedException& ) { return; } catch ( exception& e ) { SkaleException::logNested( e ); } - } - + } while (whenToRestart == 0 ); - CHECK_STATE( restartTime > 0 ); + // Stuck detection loop detected stuck. Restart. try { LOG( info, "Stuck detection engine: restarting skaled because of stuck detected." ); - _agent->restart( restartTime, restartIteration ); + _agent->restart(whenToRestart, restartIteration ); } catch ( ExitRequestedException& ) { return; } } +uint64_t StuckDetectionAgent::getNumberOfPreviousRestarts() { + // each time a restart happens, a file with a corresponding name + // is created. To find out how many restarts already happened we + // count these files + uint64_t restartCounter = 0; + while (boost::filesystem::exists(restartFileName(restartCounter + 1))) { + restartCounter++; + } + return restartCounter; +} + void StuckDetectionAgent::join() { CHECK_STATE( stuckDetectionThreadPool ); stuckDetectionThreadPool->joinAll(); @@ -174,7 +169,7 @@ bool StuckDetectionAgent::stuckCheck( uint64_t _restartIntervalMs, uint64_t _tim return result; } -uint64_t StuckDetectionAgent::checkForRestart( uint64_t _restartIteration ) { +uint64_t StuckDetectionAgent::doStuckCheck(uint64_t _restartIteration ) { CHECK_STATE( _restartIteration >= 1 ); auto baseRestartIntervalMs = getSchain()->getNode()->getStuckRestartIntervalMs(); @@ -229,7 +224,7 @@ void StuckDetectionAgent::restart( uint64_t _restartTimeMs, uint64_t _iteration exit( 13 ); } -string StuckDetectionAgent::createStuckFileName( uint64_t _iteration ) { +string StuckDetectionAgent::restartFileName(uint64_t _iteration ) { CHECK_STATE( _iteration >= 1 ); auto engine = getNode()->getConsensusEngine(); CHECK_STATE( engine ); @@ -242,7 +237,7 @@ string StuckDetectionAgent::createStuckFileName( uint64_t _iteration ) { void StuckDetectionAgent::createStuckRestartFile( uint64_t _iteration ) { CHECK_STATE( _iteration >= 1 ); - auto fileName = createStuckFileName( _iteration ); + auto fileName = restartFileName(_iteration); ofstream f; f.open( fileName, ios::trunc ); diff --git a/monitoring/StuckDetectionAgent.h b/monitoring/StuckDetectionAgent.h index e6e78172..f12c6121 100644 --- a/monitoring/StuckDetectionAgent.h +++ b/monitoring/StuckDetectionAgent.h @@ -39,7 +39,7 @@ class StuckDetectionAgent : public Agent { void join(); - uint64_t checkForRestart( uint64_t _restartIteration ); + uint64_t doStuckCheck( uint64_t _restartIteration ); void restart( uint64_t _baseRestartTimeMs, uint64_t _iteration ); @@ -47,9 +47,11 @@ class StuckDetectionAgent : public Agent { void cleanupState(); - string createStuckFileName( uint64_t _iteration ); + string restartFileName(uint64_t _iteration ); bool checkNodesAreOnline(); + uint64_t getNumberOfPreviousRestarts(); + bool stuckCheck( uint64_t _restartIntervalMs, uint64_t _timeStamp ); }; From 15865d6d464be437b20914e426efb4c6d6cce10f Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Fri, 1 Dec 2023 20:22:26 +0000 Subject: [PATCH 07/10] #813 eth_sync adding api function --- monitoring/StuckDetectionAgent.cpp | 21 +++++++++++---------- monitoring/StuckDetectionAgent.h | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/monitoring/StuckDetectionAgent.cpp b/monitoring/StuckDetectionAgent.cpp index 616bb188..a4067ebd 100644 --- a/monitoring/StuckDetectionAgent.cpp +++ b/monitoring/StuckDetectionAgent.cpp @@ -86,7 +86,7 @@ void StuckDetectionAgent::StuckDetectionLoop( StuckDetectionAgent* _agent ) { _agent->getSchain()->getNode()->exitCheck(); usleep(_agent->getSchain()->getNode()->getStuckMonitoringIntervalMs() * 1000); // this will return non-zero if skaled needs to be restarted - whenToRestart = _agent->doStuckCheck(restartIteration); + whenToRestart = _agent->doStuckCheckAndReturnTimeWhenToRestart(restartIteration); } catch ( ExitRequestedException& ) { return; } catch ( exception& e ) { @@ -169,12 +169,12 @@ bool StuckDetectionAgent::stuckCheck( uint64_t _restartIntervalMs, uint64_t _tim return result; } -uint64_t StuckDetectionAgent::doStuckCheck(uint64_t _restartIteration ) { +// this function returns 0 if now stuck is detected +// othewise it returns Linux time in ms when to restart +uint64_t StuckDetectionAgent::doStuckCheckAndReturnTimeWhenToRestart(uint64_t _restartIteration ) { CHECK_STATE( _restartIteration >= 1 ); - auto baseRestartIntervalMs = getSchain()->getNode()->getStuckRestartIntervalMs(); - - uint64_t restartIntervalMs = baseRestartIntervalMs; + auto restartIntervalMs = getSchain()->getNode()->getStuckRestartIntervalMs(); auto blockID = getSchain()->getLastCommittedBlockID(); @@ -186,12 +186,13 @@ uint64_t StuckDetectionAgent::doStuckCheck(uint64_t _restartIteration ) { if ( sChain->getCryptoManager()->isSGXServerDown() ) return 0; - auto timeStampMs = getSchain()->getBlock( blockID )->getTimeStampS() * 1000; + auto lastBlockTimeStampMs = getSchain()->getBlock(blockID )->getTimeStampS() * 1000; // check that the chain has not been doing much for a long time auto startTimeMs = Time::getCurrentTimeMs(); while ( Time::getCurrentTimeMs() - startTimeMs < 60000 ) { - if ( !stuckCheck( restartIntervalMs, timeStampMs ) ) + getNode()->exitCheck(); + if ( !stuckCheck(restartIntervalMs, lastBlockTimeStampMs ) ) return 0; usleep( 5 * 1000 * 1000 ); } @@ -201,21 +202,21 @@ uint64_t StuckDetectionAgent::doStuckCheck(uint64_t _restartIteration ) { LOG( info, "Cleaned up state" ); - return timeStampMs + restartIntervalMs + 120000; + return lastBlockTimeStampMs + restartIntervalMs + 120000; } void StuckDetectionAgent::restart( uint64_t _restartTimeMs, uint64_t _iteration ) { CHECK_STATE( _restartTimeMs > 0 ); + // wait until restart time is reached while ( Time::getCurrentTimeMs() < _restartTimeMs ) { try { usleep( 100 ); } catch ( ... ) { } - getNode()->exitCheck(); } - createStuckRestartFile( _iteration + 1 ); + createStuckRestartFile( _iteration ); LOG( err, "Consensus engine stuck detected, because no blocks were mined for a long time and " diff --git a/monitoring/StuckDetectionAgent.h b/monitoring/StuckDetectionAgent.h index f12c6121..3406ae3a 100644 --- a/monitoring/StuckDetectionAgent.h +++ b/monitoring/StuckDetectionAgent.h @@ -39,7 +39,7 @@ class StuckDetectionAgent : public Agent { void join(); - uint64_t doStuckCheck( uint64_t _restartIteration ); + uint64_t doStuckCheckAndReturnTimeWhenToRestart(uint64_t _restartIteration ); void restart( uint64_t _baseRestartTimeMs, uint64_t _iteration ); From 5c7a686e6b5906210a73f54a6d47e77b3d1d7154 Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Mon, 18 Dec 2023 20:11:33 +0000 Subject: [PATCH 08/10] 1135 get pending transactions --- chains/Schain.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/chains/Schain.cpp b/chains/Schain.cpp index e9e1d42a..ec4cd43c 100644 --- a/chains/Schain.cpp +++ b/chains/Schain.cpp @@ -404,6 +404,14 @@ void Schain::lockWithDeadLockCheck( const char* _functionName ) { result = ( ( uint64_t ) getLastCommittedBlockID() ) - committedIDOld; if ( !getNode()->isSyncOnlyNode() ) { proposeNextBlock( true ); + } else { + // on sync nodes we get candidate block and throw it away immediately + // this is to clean skaled queues + u256 stateRoot = 0; + if (!extFace) { // we are in tests + return; + } + extFace->pendingTransactions( getNode()->getMaxTransactionsPerBlock(), stateRoot ); } } From 77c101094ce15dc549cb4a1e606919c8e8170012 Mon Sep 17 00:00:00 2001 From: Stan Kladko <13399135+kladkogex@users.noreply.github.com> Date: Mon, 18 Dec 2023 20:14:37 +0000 Subject: [PATCH 09/10] 1135 get pending transactions --- chains/Schain.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/chains/Schain.cpp b/chains/Schain.cpp index ec4cd43c..4571ea05 100644 --- a/chains/Schain.cpp +++ b/chains/Schain.cpp @@ -407,11 +407,10 @@ void Schain::lockWithDeadLockCheck( const char* _functionName ) { } else { // on sync nodes we get candidate block and throw it away immediately // this is to clean skaled queues - u256 stateRoot = 0; - if (!extFace) { // we are in tests - return; + if (extFace) { // if extFace is null we are in consensus tests and there is no skaled + u256 stateRoot = 0; + extFace->pendingTransactions( getNode()->getMaxTransactionsPerBlock(), stateRoot ); } - extFace->pendingTransactions( getNode()->getMaxTransactionsPerBlock(), stateRoot ); } } From 3e16ac006c5ee92bb1019582860cdbd1d4b667c5 Mon Sep 17 00:00:00 2001 From: Oleh Nikolaiev Date: Mon, 8 Jan 2024 11:56:58 +0000 Subject: [PATCH 10/10] fix boost build --- libBLS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libBLS b/libBLS index 6fb4d5c5..7eca4e26 160000 --- a/libBLS +++ b/libBLS @@ -1 +1 @@ -Subproject commit 6fb4d5c5a1af80bcca82b95fc30e01b65c7cf235 +Subproject commit 7eca4e267d441638cbf3b201a01437803d58bb72