From 61a402a130cb8f08b0aa2245a01532c4931a74ef Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 27 May 2024 16:29:53 +0000 Subject: [PATCH 001/103] Add access functions for node_groups folder --- core/schains/config/static_accounts.py | 14 +++++++++++++- tools/configs/__init__.py | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/core/schains/config/static_accounts.py b/core/schains/config/static_accounts.py index 989da99f8..3ef3ece56 100644 --- a/core/schains/config/static_accounts.py +++ b/core/schains/config/static_accounts.py @@ -20,7 +20,7 @@ import os from tools.helper import read_json -from tools.configs import STATIC_ACCOUNTS_FOLDER, ENV_TYPE +from tools.configs import STATIC_ACCOUNTS_FOLDER, STATIC_GROUPS_FOLDER, ENV_TYPE def static_accounts(schain_name: str) -> dict: @@ -36,3 +36,15 @@ def static_accounts_filepath(schain_name: str) -> str: if not os.path.isdir(static_accounts_env_path): return '' return os.path.join(static_accounts_env_path, f'schain-{schain_name}.json') + + +def static_groups(schain_name: str) -> dict: + static_groups_env_path = static_groups_filepath(schain_name) + return read_json(static_groups_env_path(schain_name)) + + +def static_groups_filepath(schain_name: str) -> str: + static_groups_env_path = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) + if not os.path.isdir(static_groups_env_path): + return '' + return os.path.join(static_groups_env_path, f'schain-{schain_name}.json') diff --git a/tools/configs/__init__.py b/tools/configs/__init__.py index 0fa95de31..880468d5b 100644 --- a/tools/configs/__init__.py +++ b/tools/configs/__init__.py @@ -30,6 +30,7 @@ CONFIG_FOLDER = os.path.join(SKALE_VOLUME_PATH, CONFIG_FOLDER_NAME) STATIC_ACCOUNTS_FOLDER = os.path.join(CONFIG_FOLDER, 'schain_accounts') +STATIC_GROUPS_FOLDER = os.path.join(CONFIG_FOLDER, 'node_groups') FLASK_SECRET_KEY_FILENAME = 'flask_db_key.txt' FLASK_SECRET_KEY_FILE = os.path.join(NODE_DATA_PATH, FLASK_SECRET_KEY_FILENAME) From 78a16fd4f78e7f030db11104974fa3113f4431d1 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 27 May 2024 16:49:44 +0000 Subject: [PATCH 002/103] Update node_groups with legacy data --- core/schains/config/generator.py | 4 ++- core/schains/config/legacy_groups.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 core/schains/config/legacy_groups.py diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index fe4eef631..5c969b18b 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -34,7 +34,7 @@ from core.schains.config.predeployed import generate_predeployed_accounts from core.schains.config.precompiled import generate_precompiled_accounts from core.schains.config.generation import Gen -from core.schains.config.static_accounts import is_static_accounts, static_accounts +from core.schains.config.legacy_data import is_static_accounts, static_accounts, static_groups from core.schains.config.helper import get_chain_id, get_schain_id from core.schains.dkg.utils import get_common_bls_public_key from core.schains.limits import get_schain_type @@ -234,6 +234,8 @@ def generate_schain_config_with_skale( schain = skale.schains.get_by_name(schain_name) node = skale.nodes.get(node_config.id) node_groups = get_previous_schain_groups(skale, schain_name) + legacy_groups = static_groups(schain_name) + node_groups.update(legacy_groups) is_owner_contract = is_address_contract(skale.web3, schain['mainnetOwner']) diff --git a/core/schains/config/legacy_groups.py b/core/schains/config/legacy_groups.py new file mode 100644 index 000000000..3ef3ece56 --- /dev/null +++ b/core/schains/config/legacy_groups.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# This file is part of SKALE Admin +# +# Copyright (C) 2023-Present SKALE Labs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import os + +from tools.helper import read_json +from tools.configs import STATIC_ACCOUNTS_FOLDER, STATIC_GROUPS_FOLDER, ENV_TYPE + + +def static_accounts(schain_name: str) -> dict: + return read_json(static_accounts_filepath(schain_name)) + + +def is_static_accounts(schain_name: str) -> bool: + return os.path.isfile(static_accounts_filepath(schain_name)) + + +def static_accounts_filepath(schain_name: str) -> str: + static_accounts_env_path = os.path.join(STATIC_ACCOUNTS_FOLDER, ENV_TYPE) + if not os.path.isdir(static_accounts_env_path): + return '' + return os.path.join(static_accounts_env_path, f'schain-{schain_name}.json') + + +def static_groups(schain_name: str) -> dict: + static_groups_env_path = static_groups_filepath(schain_name) + return read_json(static_groups_env_path(schain_name)) + + +def static_groups_filepath(schain_name: str) -> str: + static_groups_env_path = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) + if not os.path.isdir(static_groups_env_path): + return '' + return os.path.join(static_groups_env_path, f'schain-{schain_name}.json') From d37aefc760c5a8c3a3efa64220c38086ed1f4efa Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 27 May 2024 16:54:01 +0000 Subject: [PATCH 003/103] Remove static_accounts file in favor of legacy_data --- core/schains/config/static_accounts.py | 50 -------------------------- 1 file changed, 50 deletions(-) delete mode 100644 core/schains/config/static_accounts.py diff --git a/core/schains/config/static_accounts.py b/core/schains/config/static_accounts.py deleted file mode 100644 index 3ef3ece56..000000000 --- a/core/schains/config/static_accounts.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# -# This file is part of SKALE Admin -# -# Copyright (C) 2023-Present SKALE Labs -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import os - -from tools.helper import read_json -from tools.configs import STATIC_ACCOUNTS_FOLDER, STATIC_GROUPS_FOLDER, ENV_TYPE - - -def static_accounts(schain_name: str) -> dict: - return read_json(static_accounts_filepath(schain_name)) - - -def is_static_accounts(schain_name: str) -> bool: - return os.path.isfile(static_accounts_filepath(schain_name)) - - -def static_accounts_filepath(schain_name: str) -> str: - static_accounts_env_path = os.path.join(STATIC_ACCOUNTS_FOLDER, ENV_TYPE) - if not os.path.isdir(static_accounts_env_path): - return '' - return os.path.join(static_accounts_env_path, f'schain-{schain_name}.json') - - -def static_groups(schain_name: str) -> dict: - static_groups_env_path = static_groups_filepath(schain_name) - return read_json(static_groups_env_path(schain_name)) - - -def static_groups_filepath(schain_name: str) -> str: - static_groups_env_path = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) - if not os.path.isdir(static_groups_env_path): - return '' - return os.path.join(static_groups_env_path, f'schain-{schain_name}.json') From a79737585bf3076dc71657aceb7b967abdf90c7e Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 28 May 2024 09:26:29 +0000 Subject: [PATCH 004/103] Fix legacy_data import --- core/schains/config/{legacy_groups.py => legacy_data.py} | 4 +++- tests/schains/config/static_accounts_test.py | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) rename core/schains/config/{legacy_groups.py => legacy_data.py} (94%) diff --git a/core/schains/config/legacy_groups.py b/core/schains/config/legacy_data.py similarity index 94% rename from core/schains/config/legacy_groups.py rename to core/schains/config/legacy_data.py index 3ef3ece56..bb079f16f 100644 --- a/core/schains/config/legacy_groups.py +++ b/core/schains/config/legacy_data.py @@ -40,7 +40,9 @@ def static_accounts_filepath(schain_name: str) -> str: def static_groups(schain_name: str) -> dict: static_groups_env_path = static_groups_filepath(schain_name) - return read_json(static_groups_env_path(schain_name)) + if not os.path.isfile(static_groups_env_path): + return {} + return read_json(static_groups_env_path) def static_groups_filepath(schain_name: str) -> str: diff --git a/tests/schains/config/static_accounts_test.py b/tests/schains/config/static_accounts_test.py index bb4659281..94cf51df6 100644 --- a/tests/schains/config/static_accounts_test.py +++ b/tests/schains/config/static_accounts_test.py @@ -1,4 +1,4 @@ -from core.schains.config.static_accounts import is_static_accounts, static_accounts +from core.schains.config.legacy_data import is_static_accounts, static_accounts, static_groups SCHAIN_NAME = 'test' @@ -12,3 +12,8 @@ def test_static_accounts(): accounts = static_accounts(SCHAIN_NAME) assert isinstance(accounts, dict) assert accounts.get('accounts', None) + + +def test_static_groups(): + assert static_groups(SCHAIN_NAME) + assert static_groups('not-exists') == {} From e50473556bbc9972103d08cbaba1b58dcb873721 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 28 May 2024 15:26:43 +0000 Subject: [PATCH 005/103] Fix static_groups test --- tests/schains/config/static_accounts_test.py | 71 +++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/tests/schains/config/static_accounts_test.py b/tests/schains/config/static_accounts_test.py index 94cf51df6..69c2303ce 100644 --- a/tests/schains/config/static_accounts_test.py +++ b/tests/schains/config/static_accounts_test.py @@ -1,7 +1,74 @@ +import os +import shutil + +import pytest + from core.schains.config.legacy_data import is_static_accounts, static_accounts, static_groups +from tools.configs import ENV_TYPE, STATIC_GROUPS_FOLDER +from tools.helper import write_json + SCHAIN_NAME = 'test' +STATIC_NODE_GROUPS = { + '1': { + "rotation": { + "leaving_node_id": 3, + "new_node_id": 4, + }, + "nodes": { + "0": [ + 0, + 159, + "0xgd" + ], + "4": [ + 4, + 31, + "0x5d" + ], + }, + "finish_ts": None, + "bls_public_key": None + }, + '0': { + "rotation": { + "leaving_node_id": 2, + "new_node_id": 3, + }, + "nodes": { + "0": [ + 0, + 159, + "0xgd" + ], + "3": [ + 7, + 61, + "0xbh" + ], + }, + "finish_ts": 1681390775, + "bls_public_key": { + "blsPublicKey0": "3", + "blsPublicKey1": "4", + "blsPublicKey2": "7", + "blsPublicKey3": "9" + } + } +} + + +@pytest.fixture +def groups_for_chain(): + parent_folder = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) + os.makedirs(parent_folder) + static_groups_env_path = os.path.join(parent_folder, os.path.join(f'schain-{SCHAIN_NAME}.json')) + try: + yield write_json(static_groups_env_path, STATIC_NODE_GROUPS) + finally: + shutil.rmtree(STATIC_GROUPS_FOLDER, ignore_errors=True) + def test_is_static_accounts(): assert is_static_accounts(SCHAIN_NAME) @@ -14,6 +81,6 @@ def test_static_accounts(): assert accounts.get('accounts', None) -def test_static_groups(): - assert static_groups(SCHAIN_NAME) +def test_static_groups(groups_for_chain): + assert static_groups(SCHAIN_NAME) == STATIC_NODE_GROUPS assert static_groups('not-exists') == {} From 48c62da46659e38b2cafdbf5b0d6182fbe17f037 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 28 May 2024 15:27:19 +0000 Subject: [PATCH 006/103] Rename static_accounts_test to legacy_data_test --- .../config/{static_accounts_test.py => legacy_data_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/schains/config/{static_accounts_test.py => legacy_data_test.py} (100%) diff --git a/tests/schains/config/static_accounts_test.py b/tests/schains/config/legacy_data_test.py similarity index 100% rename from tests/schains/config/static_accounts_test.py rename to tests/schains/config/legacy_data_test.py From 0c7b7ebd7728b25cff20ebcdecd0569ddbc3b778 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 28 May 2024 16:13:18 +0000 Subject: [PATCH 007/103] Add config generator test with static_group --- core/schains/config/generator.py | 5 +- tests/conftest.py | 24 +++++++- tests/schains/config/generator_test.py | 41 ++++++++++++++ tests/schains/config/legacy_data_test.py | 71 +----------------------- tests/utils.py | 49 ++++++++++++++++ 5 files changed, 119 insertions(+), 71 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index 5c969b18b..de2e5c756 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -160,6 +160,9 @@ def generate_schain_config( 'chainID': get_chain_id(schain['name']) } + legacy_groups = static_groups(schain['name']) + node_groups.update(legacy_groups) + originator_address = get_schain_originator(schain) skale_config = generate_skale_section( @@ -234,8 +237,6 @@ def generate_schain_config_with_skale( schain = skale.schains.get_by_name(schain_name) node = skale.nodes.get(node_config.id) node_groups = get_previous_schain_groups(skale, schain_name) - legacy_groups = static_groups(schain_name) - node_groups.update(legacy_groups) is_owner_contract = is_address_contract(skale.web3, schain['mainnetOwner']) diff --git a/tests/conftest.py b/tests/conftest.py index 807884c44..7850a4dd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,7 +47,13 @@ from core.schains.skaled_status import init_skaled_status, SkaledStatus from core.schains.config.skale_manager_opts import SkaleManagerOpts -from tools.configs import CONFIG_FOLDER, ENV_TYPE, META_FILEPATH, SSL_CERTIFICATES_FILEPATH +from tools.configs import ( + CONFIG_FOLDER, + ENV_TYPE, + META_FILEPATH, + SSL_CERTIFICATES_FILEPATH, + STATIC_GROUPS_FOLDER +) from tools.configs.containers import CONTAINERS_FILEPATH from tools.configs.ima import SCHAIN_IMA_ABI_FILEPATH from tools.configs.schains import SCHAINS_DIR_PATH @@ -63,6 +69,7 @@ ENDPOINT, ETH_AMOUNT_PER_NODE, ETH_PRIVATE_KEY, + STATIC_NODE_GROUPS, generate_cert, generate_schain_config, get_test_rule_controller, @@ -631,3 +638,18 @@ def ima_migration_schedule(schain_db): yield migration_schedule_path finally: os.remove(migration_schedule_path) + + +@pytest.fixture +def static_groups_for_schain(_schain_name): + parent_folder = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) + os.makedirs(parent_folder) + static_groups_env_path = os.path.join( + parent_folder, + os.path.join(f'schain-{_schain_name}.json') + ) + try: + write_json(static_groups_env_path, STATIC_NODE_GROUPS) + yield STATIC_NODE_GROUPS + finally: + shutil.rmtree(STATIC_GROUPS_FOLDER, ignore_errors=True) diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index a0c7042e4..a2907a7bc 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -570,3 +570,44 @@ def test_generate_sync_node_config_static_accounts( ) config = schain_config.to_dict() assert not config['accounts'].get('0x1111111') + + +def test_generate_config_static_groups( + _schain_name, + schain_secret_key_file_default_chain, + static_groups_for_schain, + skale_manager_opts +): + node_id, generation, rotation_id = 1, 1, 0 + ecdsa_key_name = 'test' + node_groups = {} + + schain_data = { + 'name': _schain_name, + 'partOfNode': 0, + 'generation': 1, + 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, + 'originator': TEST_ORIGINATOR_ADDRESS, + 'multitransactionMode': True + } + + schain_config = generate_schain_config( + schain=schain_data, + node=TEST_NODE, + node_id=node_id, + ecdsa_key_name=ecdsa_key_name, + rotation_id=rotation_id, + schain_nodes_with_schains=get_schain_node_with_schains(_schain_name), + node_groups=node_groups, + generation=generation, + is_owner_contract=False, + skale_manager_opts=skale_manager_opts, + common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, + schain_base_port=10000, + sync_node=True + ) + config = schain_config.to_dict() + + for rotation_id in static_groups_for_schain: + config_group = config['skaleConfig']['sChain']['nodeGroups'][rotation_id] + assert config_group == static_groups_for_schain[rotation_id] diff --git a/tests/schains/config/legacy_data_test.py b/tests/schains/config/legacy_data_test.py index 69c2303ce..cec9f79c7 100644 --- a/tests/schains/config/legacy_data_test.py +++ b/tests/schains/config/legacy_data_test.py @@ -1,74 +1,9 @@ -import os -import shutil - -import pytest - from core.schains.config.legacy_data import is_static_accounts, static_accounts, static_groups -from tools.configs import ENV_TYPE, STATIC_GROUPS_FOLDER -from tools.helper import write_json +from tests.utils import STATIC_NODE_GROUPS SCHAIN_NAME = 'test' -STATIC_NODE_GROUPS = { - '1': { - "rotation": { - "leaving_node_id": 3, - "new_node_id": 4, - }, - "nodes": { - "0": [ - 0, - 159, - "0xgd" - ], - "4": [ - 4, - 31, - "0x5d" - ], - }, - "finish_ts": None, - "bls_public_key": None - }, - '0': { - "rotation": { - "leaving_node_id": 2, - "new_node_id": 3, - }, - "nodes": { - "0": [ - 0, - 159, - "0xgd" - ], - "3": [ - 7, - 61, - "0xbh" - ], - }, - "finish_ts": 1681390775, - "bls_public_key": { - "blsPublicKey0": "3", - "blsPublicKey1": "4", - "blsPublicKey2": "7", - "blsPublicKey3": "9" - } - } -} - - -@pytest.fixture -def groups_for_chain(): - parent_folder = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) - os.makedirs(parent_folder) - static_groups_env_path = os.path.join(parent_folder, os.path.join(f'schain-{SCHAIN_NAME}.json')) - try: - yield write_json(static_groups_env_path, STATIC_NODE_GROUPS) - finally: - shutil.rmtree(STATIC_GROUPS_FOLDER, ignore_errors=True) - def test_is_static_accounts(): assert is_static_accounts(SCHAIN_NAME) @@ -81,6 +16,6 @@ def test_static_accounts(): assert accounts.get('accounts', None) -def test_static_groups(groups_for_chain): - assert static_groups(SCHAIN_NAME) == STATIC_NODE_GROUPS +def test_static_groups(_schain_name, static_groups_for_schain): + assert static_groups(_schain_name) == STATIC_NODE_GROUPS assert static_groups('not-exists') == {} diff --git a/tests/utils.py b/tests/utils.py index 29f37e745..fc7df6774 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -462,3 +462,52 @@ def generate_schain_config(schain_name): } } } + + +STATIC_NODE_GROUPS = { + '1': { + "rotation": { + "leaving_node_id": 3, + "new_node_id": 4, + }, + "nodes": { + "0": [ + 0, + 159, + "0xgd" + ], + "4": [ + 4, + 31, + "0x5d" + ], + }, + "finish_ts": None, + "bls_public_key": None + }, + '0': { + "rotation": { + "leaving_node_id": 2, + "new_node_id": 3, + }, + "nodes": { + "0": [ + 0, + 159, + "0xgd" + ], + "3": [ + 7, + 61, + "0xbh" + ], + }, + "finish_ts": 1681390775, + "bls_public_key": { + "blsPublicKey0": "3", + "blsPublicKey1": "4", + "blsPublicKey2": "7", + "blsPublicKey3": "9" + } + } +} From 525811bc7c82d92c3b00fee589baadac14fa7fe3 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 28 May 2024 18:51:14 +0000 Subject: [PATCH 008/103] Improve static node group tests --- core/schains/config/generator.py | 2 + tests/schains/config/generator_test.py | 81 ++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 4 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index de2e5c756..e2b4c979a 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -161,7 +161,9 @@ def generate_schain_config( } legacy_groups = static_groups(schain['name']) + print(node_groups) node_groups.update(legacy_groups) + print(node_groups) originator_address = get_schain_originator(schain) diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index a2907a7bc..1de12444f 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -43,6 +43,79 @@ ], } +NODE_GROUPS = { + '2': { + "rotation": { + "leaving_node_id": 0, + "new_node_id": 5, + }, + "nodes": { + "4": [ + 4, + 31, + "0x5d" + ], + "5": [ + 8, + 179, + "0xon" + ], + }, + "finish_ts": 1681498775, + "bls_public_key": { + "blsPublicKey0": "9", + "blsPublicKey1": "1", + "blsPublicKey2": "3", + "blsPublicKey3": "2" + } + }, + '1': { + "rotation": { + "leaving_node_id": 3, + "new_node_id": 4, + }, + "nodes": { + "0": [ + 0, + 159, + "0xgd" + ], + "4": [ + 4, + 31, + "0x5d" + ], + }, + "finish_ts": 1681390775, + "bls_public_key": { + "blsPublicKey0": "3", + "blsPublicKey1": "4", + "blsPublicKey2": "7", + "blsPublicKey3": "9" + } + }, + '0': { + "rotation": { + "leaving_node_id": 2, + "new_node_id": 3, + }, + "nodes": { + "0": [ + 0, + 159, + "0xgd" + ], + "3": [ + 7, + 61, + "0xbh" + ], + }, + "finish_ts": None, + "bls_public_key": None + } +} + TEST_ORIGINATOR_ADDRESS = '0x0B5e3eBB74eE281A24DDa3B1A4e70692c15EAC34' TEST_MAINNET_OWNER_ADDRESS = '0x30E1C96277735B03E59B3098204fd04FD0e78a46' @@ -580,7 +653,6 @@ def test_generate_config_static_groups( ): node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' - node_groups = {} schain_data = { 'name': _schain_name, @@ -598,7 +670,7 @@ def test_generate_config_static_groups( ecdsa_key_name=ecdsa_key_name, rotation_id=rotation_id, schain_nodes_with_schains=get_schain_node_with_schains(_schain_name), - node_groups=node_groups, + node_groups=NODE_GROUPS, generation=generation, is_owner_contract=False, skale_manager_opts=skale_manager_opts, @@ -608,6 +680,7 @@ def test_generate_config_static_groups( ) config = schain_config.to_dict() + config_group = config['skaleConfig']['sChain']['nodeGroups'] + assert len(config_group.keys()) == 3 for rotation_id in static_groups_for_schain: - config_group = config['skaleConfig']['sChain']['nodeGroups'][rotation_id] - assert config_group == static_groups_for_schain[rotation_id] + assert config_group[rotation_id] == static_groups_for_schain[rotation_id] From a3dd11adec4ea949018bd9131d5a585afce348ea Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 29 May 2024 12:36:30 +0000 Subject: [PATCH 009/103] Handle discrepancy rotation_id types --- core/schains/config/generator.py | 5 +++-- core/schains/config/legacy_data.py | 7 ++++++- tests/schains/config/generator_test.py | 8 ++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index e2b4c979a..d35c3b3ea 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -161,9 +161,10 @@ def generate_schain_config( } legacy_groups = static_groups(schain['name']) - print(node_groups) + logger.info('Legacy node groups: %s', legacy_groups) + logger.info('Vanilla node groups: %s', node_groups) node_groups.update(legacy_groups) - print(node_groups) + logger.info('Modified node groups: %s', node_groups) originator_address = get_schain_originator(schain) diff --git a/core/schains/config/legacy_data.py b/core/schains/config/legacy_data.py index bb079f16f..6c05e6761 100644 --- a/core/schains/config/legacy_data.py +++ b/core/schains/config/legacy_data.py @@ -42,7 +42,12 @@ def static_groups(schain_name: str) -> dict: static_groups_env_path = static_groups_filepath(schain_name) if not os.path.isfile(static_groups_env_path): return {} - return read_json(static_groups_env_path) + groups = read_json(static_groups_env_path) + prepared_groups = {} + for plain_rotation_id, data in groups.items(): + rotation_id = int(plain_rotation_id) + prepared_groups[rotation_id] = data + return prepared_groups def static_groups_filepath(schain_name: str) -> str: diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 1de12444f..19dfe8dab 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -44,7 +44,7 @@ } NODE_GROUPS = { - '2': { + 2: { "rotation": { "leaving_node_id": 0, "new_node_id": 5, @@ -69,7 +69,7 @@ "blsPublicKey3": "2" } }, - '1': { + 1: { "rotation": { "leaving_node_id": 3, "new_node_id": 4, @@ -94,7 +94,7 @@ "blsPublicKey3": "9" } }, - '0': { + 0: { "rotation": { "leaving_node_id": 2, "new_node_id": 3, @@ -683,4 +683,4 @@ def test_generate_config_static_groups( config_group = config['skaleConfig']['sChain']['nodeGroups'] assert len(config_group.keys()) == 3 for rotation_id in static_groups_for_schain: - assert config_group[rotation_id] == static_groups_for_schain[rotation_id] + assert config_group[int(rotation_id)] == static_groups_for_schain[rotation_id] From e04d4a0510c11159fca8ff19abfaa827ec00dbe6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 29 May 2024 14:28:26 +0000 Subject: [PATCH 010/103] Fix static_groups test --- core/schains/config/legacy_data.py | 2 -- tests/schains/config/legacy_data_test.py | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/schains/config/legacy_data.py b/core/schains/config/legacy_data.py index 6c05e6761..e86f0661d 100644 --- a/core/schains/config/legacy_data.py +++ b/core/schains/config/legacy_data.py @@ -52,6 +52,4 @@ def static_groups(schain_name: str) -> dict: def static_groups_filepath(schain_name: str) -> str: static_groups_env_path = os.path.join(STATIC_GROUPS_FOLDER, ENV_TYPE) - if not os.path.isdir(static_groups_env_path): - return '' return os.path.join(static_groups_env_path, f'schain-{schain_name}.json') diff --git a/tests/schains/config/legacy_data_test.py b/tests/schains/config/legacy_data_test.py index cec9f79c7..6b8e7b795 100644 --- a/tests/schains/config/legacy_data_test.py +++ b/tests/schains/config/legacy_data_test.py @@ -17,5 +17,7 @@ def test_static_accounts(): def test_static_groups(_schain_name, static_groups_for_schain): - assert static_groups(_schain_name) == STATIC_NODE_GROUPS + groups = static_groups(_schain_name) + for key, value in STATIC_NODE_GROUPS.items(): + assert groups[int(key)] == value assert static_groups('not-exists') == {} From e310d31e2d1abeb50fcc627752a39bf7abafa65f Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 29 May 2024 15:56:58 +0000 Subject: [PATCH 011/103] Move node_group logs to debug level --- core/schains/config/generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index d35c3b3ea..b386efdfb 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -161,10 +161,10 @@ def generate_schain_config( } legacy_groups = static_groups(schain['name']) - logger.info('Legacy node groups: %s', legacy_groups) - logger.info('Vanilla node groups: %s', node_groups) + logger.debug('Legacy node groups: %s', legacy_groups) + logger.debug('Vanilla node groups: %s', node_groups) node_groups.update(legacy_groups) - logger.info('Modified node groups: %s', node_groups) + logger.debug('Modified node groups: %s', node_groups) originator_address = get_schain_originator(schain) From 5c59d13e96fc4fa352b18d0676728fd1b0c9ca75 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 3 Jun 2024 18:36:00 +0000 Subject: [PATCH 012/103] Fix legacy groups preparation --- core/schains/config/legacy_data.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/schains/config/legacy_data.py b/core/schains/config/legacy_data.py index e86f0661d..000d2455d 100644 --- a/core/schains/config/legacy_data.py +++ b/core/schains/config/legacy_data.py @@ -19,6 +19,8 @@ import os +from skale.schain_config.rotation_history import RotationNodeData + from tools.helper import read_json from tools.configs import STATIC_ACCOUNTS_FOLDER, STATIC_GROUPS_FOLDER, ENV_TYPE @@ -47,6 +49,11 @@ def static_groups(schain_name: str) -> dict: for plain_rotation_id, data in groups.items(): rotation_id = int(plain_rotation_id) prepared_groups[rotation_id] = data + prepared_nodes = prepared_groups[rotation_id]['nodes'] + node_ids_string = list(data['nodes'].keys()) + for node_id_string in node_ids_string: + node_info = prepared_nodes.pop(node_id_string) + prepared_nodes[int(node_id_string)] = RotationNodeData(*node_info) return prepared_groups From 905ddb7ba39ba164881d4ce7e02d0414a94fdc15 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 4 Jun 2024 09:35:28 +0000 Subject: [PATCH 013/103] Fix tests --- tests/schains/config/generator_test.py | 6 ++++-- tests/schains/config/legacy_data_test.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 19dfe8dab..badf7aebc 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -682,5 +682,7 @@ def test_generate_config_static_groups( config_group = config['skaleConfig']['sChain']['nodeGroups'] assert len(config_group.keys()) == 3 - for rotation_id in static_groups_for_schain: - assert config_group[int(rotation_id)] == static_groups_for_schain[rotation_id] + for rotation_id_string in static_groups_for_schain: + rotation_id = int(rotation_id_string) + assert json.dumps(config_group[rotation_id]) == \ + json.dumps(static_groups_for_schain[rotation_id_string]) diff --git a/tests/schains/config/legacy_data_test.py b/tests/schains/config/legacy_data_test.py index 6b8e7b795..8708dc3fb 100644 --- a/tests/schains/config/legacy_data_test.py +++ b/tests/schains/config/legacy_data_test.py @@ -1,3 +1,5 @@ +import json + from core.schains.config.legacy_data import is_static_accounts, static_accounts, static_groups from tests.utils import STATIC_NODE_GROUPS @@ -19,5 +21,5 @@ def test_static_accounts(): def test_static_groups(_schain_name, static_groups_for_schain): groups = static_groups(_schain_name) for key, value in STATIC_NODE_GROUPS.items(): - assert groups[int(key)] == value + assert json.dumps(groups[int(key)]) == json.dumps(value) assert static_groups('not-exists') == {} From 625c745bb0c2935a736fcb7b7aceff1ca1b95f92 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 17 Jun 2024 15:06:23 +0000 Subject: [PATCH 014/103] Add interaction through node_cli.status file for repair mode --- core/schains/checks.py | 145 +++++------- core/schains/config/directory.py | 7 +- core/schains/config/static_params.py | 6 +- core/schains/monitor/action.py | 9 +- core/schains/monitor/main.py | 4 +- core/schains/monitor/skaled_monitor.py | 37 ++- core/schains/skaled_status.py | 84 +++++-- tests/conftest.py | 199 +++++++--------- tests/schains/monitor/skaled_monitor_test.py | 234 ++++++------------- tests/schains/skaled_status_test.py | 30 ++- tests/utils.py | 1 + tools/configs/schains.py | 1 + web/migrations.py | 10 + web/models/schain.py | 7 + 14 files changed, 368 insertions(+), 406 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 73f12313b..108a88333 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -32,11 +32,11 @@ get_base_port_from_config, get_node_ips_from_config, get_own_ip_from_config, - get_local_schain_http_endpoint_from_config + get_local_schain_http_endpoint_from_config, ) from core.schains.config.main import ( get_skaled_config_rotations_ids, - get_upstream_config_rotation_ids + get_upstream_config_rotation_ids, ) from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController @@ -45,14 +45,14 @@ from core.schains.rpc import ( check_endpoint_alive, check_endpoint_blocks, - get_endpoint_alive_check_timeout + get_endpoint_alive_check_timeout, ) from core.schains.external_config import ExternalConfig, ExternalState from core.schains.runner import ( get_container_name, get_ima_container_time_frame, get_image_name, - is_new_image_pulled + is_new_image_pulled, ) from core.schains.skaled_exit_codes import SkaledExitCodes from core.schains.volume import is_volume_exists @@ -79,7 +79,7 @@ 'rpc', 'blocks', 'process', - 'ima_container' + 'ima_container', ] TG_ALLOWED_CHECKS = [ @@ -90,7 +90,7 @@ 'rpc', 'blocks', 'process', - 'ima_container' + 'ima_container', ] @@ -111,11 +111,13 @@ class IChecks(ABC): def get_name(self) -> str: pass - def get_all(self, - log: bool = True, - save: bool = False, - expose: bool = False, - needed: Optional[List[str]] = None) -> Dict: + def get_all( + self, + log: bool = True, + save: bool = False, + expose: bool = False, + needed: Optional[List[str]] = None, + ) -> Dict: if needed: names = needed else: @@ -140,25 +142,26 @@ def is_healthy(self) -> bool: @classmethod def get_check_names(cls): - return list(filter( - lambda c: not c.startswith('_') and isinstance( - getattr(cls, c), property), - dir(cls) - )) + return list( + filter( + lambda c: not c.startswith('_') and isinstance(getattr(cls, c), property), dir(cls) + ) + ) class ConfigChecks(IChecks): - def __init__(self, - schain_name: str, - node_id: int, - schain_record: SChainRecord, - rotation_id: int, - stream_version: str, - current_nodes: list[ExtendedManagerNodeInfo], - estate: ExternalState, - sync_node: bool = False, - econfig: Optional[ExternalConfig] = None - ) -> None: + def __init__( + self, + schain_name: str, + node_id: int, + schain_record: SChainRecord, + rotation_id: int, + stream_version: str, + current_nodes: list[ExtendedManagerNodeInfo], + estate: ExternalState, + sync_node: bool = False, + econfig: Optional[ExternalConfig] = None, + ) -> None: self.name = schain_name self.node_id = node_id self.schain_record = schain_record @@ -168,9 +171,7 @@ def __init__(self, self.estate = estate self.sync_node = sync_node self.econfig = econfig or ExternalConfig(schain_name) - self.cfm: ConfigFileManager = ConfigFileManager( - schain_name=schain_name - ) + self.cfm: ConfigFileManager = ConfigFileManager(schain_name=schain_name) self.statsd_client = get_statsd_client() def get_name(self) -> str: @@ -185,10 +186,7 @@ def config_dir(self) -> CheckRes: @property def dkg(self) -> CheckRes: """Checks that DKG procedure is completed""" - secret_key_share_filepath = get_secret_key_share_filepath( - self.name, - self.rotation_id - ) + secret_key_share_filepath = get_secret_key_share_filepath(self.name, self.rotation_id) return CheckRes(os.path.isfile(secret_key_share_filepath)) @property @@ -227,17 +225,14 @@ def upstream_config(self) -> CheckRes: exists, node_ips_updated, stream_updated, - triggered + triggered, ) return CheckRes(exists and node_ips_updated and stream_updated and not triggered) @property def external_state(self) -> CheckRes: actual_state = self.econfig.get() - logger.debug( - 'Checking external config. Current %s. Saved %s', - self.estate, actual_state - ) + logger.debug('Checking external config. Current %s. Saved %s', self.estate, actual_state) return CheckRes(self.econfig.synced(self.estate)) @@ -250,7 +245,7 @@ def __init__( *, econfig: Optional[ExternalConfig] = None, dutils: Optional[DockerUtils] = None, - sync_node: bool = False + sync_node: bool = False, ): self.name = schain_name self.schain_record = schain_record @@ -259,9 +254,7 @@ def __init__( self.econfig = econfig or ExternalConfig(name=schain_name) self.sync_node = sync_node self.rc = rule_controller - self.cfm: ConfigFileManager = ConfigFileManager( - schain_name=schain_name - ) + self.cfm: ConfigFileManager = ConfigFileManager(schain_name=schain_name) self.statsd_client = get_statsd_client() def get_name(self) -> str: @@ -278,9 +271,7 @@ def rotation_id_updated(self) -> CheckRes: upstream_rotations = get_upstream_config_rotation_ids(self.cfm) config_rotations = get_skaled_config_rotations_ids(self.cfm) logger.debug( - 'Comparing rotation_ids. Upstream: %s. Config: %s', - upstream_rotations, - config_rotations + 'Comparing rotation_ids. Upstream: %s. Config: %s', upstream_rotations, config_rotations ) return CheckRes(upstream_rotations == config_rotations) @@ -292,19 +283,14 @@ def config_updated(self) -> CheckRes: @property def config(self) -> CheckRes: - """ Checks that sChain config file exists """ + """Checks that sChain config file exists""" return CheckRes(self.cfm.skaled_config_exists()) @property def volume(self) -> CheckRes: """Checks that sChain volume exists""" - return CheckRes( - is_volume_exists( - self.name, - sync_node=self.sync_node, - dutils=self.dutils) - ) + return CheckRes(is_volume_exists(self.name, sync_node=self.sync_node, dutils=self.dutils)) @property def firewall_rules(self) -> CheckRes: @@ -316,10 +302,7 @@ def firewall_rules(self) -> CheckRes: own_ip = get_own_ip_from_config(conf) ranges = self.econfig.ranges self.rc.configure( - base_port=base_port, - own_ip=own_ip, - node_ips=node_ips, - sync_ip_ranges=ranges + base_port=base_port, own_ip=own_ip, node_ips=node_ips, sync_ip_ranges=ranges ) logger.debug(f'Rule controller {self.rc.expected_rules()}') return CheckRes(self.rc.is_rules_synced()) @@ -364,19 +347,19 @@ def ima_container(self) -> CheckRes: updated_time_frame = time_frame == container_time_frame logger.debug( 'IMA image %s, container image %s, time frame %d, container_time_frame %d', - expected_image, image, time_frame, container_time_frame + expected_image, + image, + time_frame, + container_time_frame, ) data = { 'container_running': container_running, 'updated_image': updated_image, 'new_image_pulled': new_image_pulled, - 'updated_time_frame': updated_time_frame + 'updated_time_frame': updated_time_frame, } - logger.debug( - '%s, IMA check - %s', - self.name, data - ) + logger.debug('%s, IMA check - %s', self.name, data) result: bool = all(data.values()) return CheckRes(result, data=data) @@ -387,9 +370,7 @@ def rpc(self) -> CheckRes: if self.config: config = self.cfm.skaled_config http_endpoint = get_local_schain_http_endpoint_from_config(config) - timeout = get_endpoint_alive_check_timeout( - self.schain_record.failed_rpc_count - ) + timeout = get_endpoint_alive_check_timeout(self.schain_record.failed_rpc_count) res = check_endpoint_alive(http_endpoint, timeout=timeout) return CheckRes(res) @@ -430,7 +411,7 @@ def __init__( *, econfig: Optional[ExternalConfig] = None, dutils: DockerUtils = None, - sync_node: bool = False + sync_node: bool = False, ): self._subjects = [ ConfigChecks( @@ -442,7 +423,7 @@ def __init__( current_nodes=current_nodes, estate=estate, econfig=econfig, - sync_node=sync_node + sync_node=sync_node, ), SkaledChecks( schain_name=schain_name, @@ -450,8 +431,8 @@ def __init__( rule_controller=rule_controller, econfig=econfig, dutils=dutils, - sync_node=sync_node - ) + sync_node=sync_node, + ), ] def __getattr__(self, attr: str) -> Any: @@ -469,11 +450,7 @@ def get_all(self, log: bool = True, save: bool = False, needed: Optional[List[st plain_checks = {} for subj in self._subjects: logger.debug('Running checks for %s', subj) - subj_checks = subj.get_all( - log=False, - save=False, - needed=needed - ) + subj_checks = subj.get_all(log=False, save=False, needed=needed) plain_checks.update(subj_checks) if not self.estate or not self.estate.ima_linked: if 'ima_container' in plain_checks: @@ -492,13 +469,9 @@ def get_api_checks_status(status: Dict, allowed: List = API_ALLOWED_CHECKS) -> D def save_checks_dict(schain_name, checks_dict): schain_check_path = get_schain_check_filepath(schain_name) - logger.info( - f'Saving checks for the chain {schain_name}: {schain_check_path}') + logger.info(f'Saving checks for the chain {schain_name}: {schain_check_path}') try: - write_json(schain_check_path, { - 'time': time.time(), - 'checks': checks_dict - }) + write_json(schain_check_path, {'time': time.time(), 'checks': checks_dict}) except Exception: logger.exception(f'Failed to save checks: {schain_check_path}') @@ -510,14 +483,12 @@ def log_checks_dict(schain_name, checks_dict): if not checks_dict[check]: failed_checks.append(check) if len(failed_checks) != 0: - failed_checks_str = ", ".join(failed_checks) + failed_checks_str = ', '.join(failed_checks) logger.info( arguments_list_string( - { - 'sChain name': schain_name, - 'Failed checks': failed_checks_str - }, - 'Failed sChain checks', 'error' + {'sChain name': schain_name, 'Failed checks': failed_checks_str}, + 'Failed sChain checks', + 'error', ) ) diff --git a/core/schains/config/directory.py b/core/schains/config/directory.py index 8d2c7a66d..ce11c8196 100644 --- a/core/schains/config/directory.py +++ b/core/schains/config/directory.py @@ -25,10 +25,11 @@ from tools.configs.schains import ( BASE_SCHAIN_CONFIG_FILEPATH, + NODE_CLI_STATUS_FILENAME, SCHAINS_DIR_PATH, SCHAINS_DIR_PATH_HOST, SCHAIN_SCHECKS_FILENAME, - SKALED_STATUS_FILENAME + SKALED_STATUS_FILENAME, ) @@ -58,6 +59,10 @@ def skaled_status_filepath(name: str) -> str: return os.path.join(schain_config_dir(name), SKALED_STATUS_FILENAME) +def node_cli_status_filepath(name: str) -> str: + return os.path.join(schain_config_dir(name), NODE_CLI_STATUS_FILENAME) + + def get_schain_check_filepath(schain_name): schain_dir_path = schain_config_dir(schain_name) return os.path.join(schain_dir_path, SCHAIN_SCHECKS_FILENAME) diff --git a/core/schains/config/static_params.py b/core/schains/config/static_params.py index 83d140c12..62e83761d 100644 --- a/core/schains/config/static_params.py +++ b/core/schains/config/static_params.py @@ -21,13 +21,15 @@ from core.schains.config.helper import get_static_params from tools.configs import ENV_TYPE +from typing import Optional + def get_static_schain_cmd(env_type: str = ENV_TYPE) -> list: static_params = get_static_params(env_type) return static_params['schain_cmd'] -def get_static_schain_info(schain_name: str, env_type: str = ENV_TYPE) -> dict | None: +def get_static_schain_info(schain_name: str, env_type: str = ENV_TYPE) -> Optional[dict]: static_params = get_static_params(env_type) static_params_schain = static_params['schain'] processed_params = {} @@ -36,7 +38,7 @@ def get_static_schain_info(schain_name: str, env_type: str = ENV_TYPE) -> dict | return processed_params -def get_schain_static_param(static_param_schain: dict | int, schain_name: str) -> int: +def get_schain_static_param(static_param_schain: dict, schain_name: str) -> int: if isinstance(static_param_schain, int): return static_param_schain elif isinstance(static_param_schain, dict) and schain_name in static_param_schain: diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index dcc7696b4..d4baa4e20 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -555,4 +555,11 @@ def notify_repair_mode(self) -> None: @BaseActionManager.monitor_block def disable_repair_mode(self) -> None: logger.info('Switching off repair mode') - self.schain_record.set_repair_mode(False) + if self.schain_record.repair_mode: + self.schain_record.set_repair_mode(False) + + @BaseActionManager.monitor_block + def update_repair_ts(self, new_ts: int) -> None: + logger.info('Setting repair_ts to %d', new_ts) + new_dt = datetime.utcfromtimestamp(new_ts) + self.schain_record.set_repair_date(new_dt) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 27405872c..a1e65aa23 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -49,7 +49,7 @@ from core.schains.external_config import ExternalConfig, ExternalState from core.schains.task import keep_tasks_running, Task from core.schains.config.static_params import get_automatic_repair_option -from core.schains.skaled_status import get_skaled_status +from core.schains.skaled_status import get_node_cli_status, get_skaled_status from core.node import get_current_nodes from tools.docker_utils import DockerUtils @@ -150,6 +150,7 @@ def run_skaled_pipeline( ) skaled_status = get_skaled_status(name) + ncli_status = get_node_cli_status(name) skaled_am = SkaledActionManager( schain=schain, @@ -174,6 +175,7 @@ def run_skaled_pipeline( status=status, schain_record=schain_record, skaled_status=skaled_status, + ncli_status=ncli_status, automatic_repair=automatic_repair ) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 27c3a4338..4095bac93 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -26,7 +26,7 @@ from core.schains.checks import SkaledChecks from core.schains.monitor.action import SkaledActionManager from core.schains.config.main import get_number_of_secret_shares -from core.schains.skaled_status import SkaledStatus +from core.schains.skaled_status import NodeCliStatus, SkaledStatus from core.schains.ssl import ssl_reload_needed from tools.configs import SYNC_NODE from tools.resources import get_statsd_client @@ -37,11 +37,7 @@ class BaseSkaledMonitor(IMonitor): - def __init__( - self, - action_manager: SkaledActionManager, - checks: SkaledChecks - ) -> None: + def __init__(self, action_manager: SkaledActionManager, checks: SkaledChecks) -> None: self.am = action_manager self.checks = checks self.statsd_client = get_statsd_client() @@ -64,7 +60,6 @@ def run(self): class RegularSkaledMonitor(BaseSkaledMonitor): - def execute(self) -> None: if not self.checks.firewall_rules: self.am.firewall_rules() @@ -90,7 +85,7 @@ def execute(self) -> None: logger.warning( 'Repair mode execution, record: %s, exit_code_ok: %s', self.checks.schain_record.repair_mode, - self.checks.exit_code_ok.status + self.checks.exit_code_ok.status, ) self.am.notify_repair_mode() self.am.cleanup_schain_docker_entity() @@ -102,7 +97,7 @@ def execute(self) -> None: self.am.skaled_container(download_snapshot=True) else: self.am.reset_restart_count() - self.am.disable_repair_mode() + self.am.update_repair_ts(new_ts=int(time.time())) class BackupSkaledMonitor(BaseSkaledMonitor): @@ -223,10 +218,7 @@ def execute(self): if not self.checks.firewall_rules: self.am.firewall_rules() if not self.checks.skaled_container: - self.am.skaled_container( - download_snapshot=True, - start_ts=self.am.finish_ts - ) + self.am.skaled_container(download_snapshot=True, start_ts=self.am.finish_ts) else: self.am.reset_restart_counter() if not self.checks.ima_container: @@ -241,12 +233,13 @@ def is_repair_mode( schain_record: SChainRecord, status: Dict, skaled_status: Optional[SkaledStatus], - automatic_repair: bool + ncli_status: Optional[NodeCliStatus], + automatic_repair: bool, ) -> bool: - if schain_record.repair_mode: + repair_ts = int(schain_record.repair_date.timestamp()) + if ncli_status is not None and ncli_status.repair_ts > repair_ts: return True - else: - return automatic_repair and is_skaled_repair_status(status, skaled_status) + return automatic_repair and is_skaled_repair_status(status, skaled_status) def is_reload_group_mode(status: Dict, finish_ts: Optional[int]) -> bool: @@ -262,10 +255,7 @@ def is_reload_ip_mode(status: Dict, reload_ts: Optional[int]) -> bool: return status['config'] and not status['config_updated'] -def is_config_update_time( - status: Dict, - skaled_status: Optional[SkaledStatus] -) -> bool: +def is_config_update_time(status: Dict, skaled_status: Optional[SkaledStatus]) -> bool: if not skaled_status: return False return not status['skaled_container'] and skaled_status.exit_time_reached @@ -300,7 +290,8 @@ def get_skaled_monitor( status: Dict, schain_record: SChainRecord, skaled_status: SkaledStatus, - automatic_repair: bool = True + ncli_status: NodeCliStatus, + automatic_repair: bool = True, ) -> Type[BaseSkaledMonitor]: logger.info('Choosing skaled monitor') if skaled_status: @@ -325,7 +316,7 @@ def get_skaled_monitor( mon_type = NoConfigSkaledMonitor elif is_backup_mode(schain_record): mon_type = BackupSkaledMonitor - elif is_repair_mode(schain_record, status, skaled_status, automatic_repair): + elif is_repair_mode(schain_record, status, skaled_status, ncli_status, automatic_repair): mon_type = RepairSkaledMonitor elif is_recreate_mode(schain_record): mon_type = RecreateSkaledMonitor diff --git a/core/schains/skaled_status.py b/core/schains/skaled_status.py index 02186a4a9..2b40b2271 100644 --- a/core/schains/skaled_status.py +++ b/core/schains/skaled_status.py @@ -22,22 +22,52 @@ import logging from json.decoder import JSONDecodeError from typing import Optional +from abc import ABCMeta, abstractmethod -from core.schains.config.directory import skaled_status_filepath +from core.schains.config.directory import node_cli_status_filepath, skaled_status_filepath from tools.config_utils import config_getter, log_broken_status_file from tools.helper import read_json logger = logging.getLogger(__name__) -class SkaledStatus: - def __init__(self, filepath: str): +class IStatus(metaclass=ABCMeta): + @abstractmethod + def __init__(self, filepath: str) -> None: + pass + + @property + @abstractmethod + def filepath(self) -> str: + pass + + @property + def all(self) -> dict: + if not os.path.isfile(self.filepath): + logger.warning("File %s is not found", self.filepath) + return + try: + return read_json(self.filepath) + except JSONDecodeError: + log_broken_status_file(self.filepath) + return {} + + def log(self) -> None: + logger.info(f'{self.__class__.__name__}: \n' + json.dumps(self.all, indent=4)) + + +class SkaledStatus(IStatus): + def __init__(self, filepath: str) -> None: """ Read-only wrapper for skaled.status file, reads from the file each time. Returns dict for top-level keys, True or False for second-level keys. Returns None for all keys if file is not found. """ - self.filepath = filepath + self._filepath = filepath + + @property + def filepath(self) -> str: + return self._filepath @property @config_getter @@ -84,28 +114,48 @@ def start_from_snapshot(self) -> bool: return return exit_state['StartFromSnapshot'] + +class NodeCliStatus(IStatus): + def __init__(self, filepath: str) -> None: + """ + Read-only wrapper for node_cli.status file, reads from the file each time. + """ + self._filepath = filepath + @property - def all(self) -> dict: - if not os.path.isfile(self.filepath): - logger.warning("File %s is not found", self.filepath) - return - try: - return read_json(self.filepath) - except JSONDecodeError: - log_broken_status_file(self.filepath) - return {} + @config_getter + def repair_ts(self) -> int: + return 'repair_ts', self.filepath - def log(self) -> None: - logger.info('skaled status file: \n' + json.dumps(self.all, indent=4)) + @property + @config_getter + def snapshot_from(self) -> int: + return 'snapshot_from', self.filepath + + @property + def filepath(self) -> str: + return self._filepath -def init_skaled_status(schain_name) -> SkaledStatus: +def init_skaled_status(schain_name: str) -> SkaledStatus: status_filepath = skaled_status_filepath(schain_name) return SkaledStatus(status_filepath) -def get_skaled_status(schain_name) -> Optional[SkaledStatus]: +def get_skaled_status(schain_name: str) -> Optional[SkaledStatus]: status_path = skaled_status_filepath(schain_name) if os.path.isfile(status_path): return SkaledStatus(status_path) return None + + +def init_node_cli_status(schain_name: str) -> SkaledStatus: + status_filepath = node_cli_status_filepath(schain_name) + return NodeCliStatus(status_filepath) + + +def get_node_cli_status(schain_name: str) -> Optional[SkaledStatus]: + status_path = node_cli_status_filepath(schain_name) + if os.path.isfile(status_path): + return NodeCliStatus(status_path) + return None diff --git a/tests/conftest.py b/tests/conftest.py index 807884c44..cd17972ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,9 +15,7 @@ from skale import SkaleManager from skale.wallets import Web3Wallet from skale.utils.account_tools import generate_account, send_eth -from skale.utils.contracts_provision.fake_multisig_contract import ( - deploy_fake_multisig_contract -) +from skale.utils.contracts_provision.fake_multisig_contract import deploy_fake_multisig_contract from skale.utils.contracts_provision.main import ( add_test_permissions, add_test2_schain_type, @@ -27,7 +25,7 @@ create_nodes, create_schain, link_nodes_to_validator, - setup_validator + setup_validator, ) from skale.utils.web3_utils import init_web3 @@ -38,13 +36,18 @@ from core.schains.config.helper import ( get_base_port_from_config, get_node_ips_from_config, - get_own_ip_from_config + get_own_ip_from_config, ) from core.schains.config.directory import schain_config_dir, skaled_status_filepath from core.schains.cleaner import remove_schain_container, remove_schain_volume from core.schains.ima import ImaData from core.schains.external_config import ExternalConfig, ExternalState -from core.schains.skaled_status import init_skaled_status, SkaledStatus +from core.schains.skaled_status import ( + init_node_cli_status, + init_skaled_status, + node_cli_status_filepath, + SkaledStatus, +) from core.schains.config.skale_manager_opts import SkaleManagerOpts from tools.configs import CONFIG_FOLDER, ENV_TYPE, META_FILEPATH, SSL_CERTIFICATES_FILEPATH @@ -60,6 +63,7 @@ from tests.utils import ( ALLOWED_RANGES, CONFIG_STREAM, + CURRENT_TS, ENDPOINT, ETH_AMOUNT_PER_NODE, ETH_PRIVATE_KEY, @@ -69,7 +73,7 @@ IMA_MIGRATION_TS, init_skale_from_wallet, init_skale_ima, - upsert_schain_record_with_config + upsert_schain_record_with_config, ) NUMBER_OF_NODES = 2 @@ -81,14 +85,8 @@ def images(): cinfo = {} with open(CONTAINERS_FILEPATH, 'r') as cf: json.load(cinfo, cf) - schain_image = '{}/{}'.format( - cinfo['schain']['name'], - cinfo['schain']['version'] - ) - ima_image = '{}/{}'.format( - cinfo['ima']['name'], - cinfo['ima']['version'] - ) + schain_image = '{}/{}'.format(cinfo['schain']['name'], cinfo['schain']['version']) + ima_image = '{}/{}'.format(cinfo['ima']['name'], cinfo['ima']['version']) dclient.images.pull(schain_image) dclient.images.pull(ima_image) @@ -104,14 +102,14 @@ def predeployed_ima(): @pytest.fixture(scope='session') def web3(): - """ Returns a SKALE Manager instance with provider from config """ + """Returns a SKALE Manager instance with provider from config""" w3 = init_web3(ENDPOINT) return w3 @pytest.fixture(scope='session') def skale(web3): - """ Returns a SKALE Manager instance with provider from config """ + """Returns a SKALE Manager instance with provider from config""" wallet = Web3Wallet(ETH_PRIVATE_KEY, web3) skale_obj = init_skale_from_wallet(wallet) add_test_permissions(skale_obj) @@ -139,7 +137,7 @@ def node_wallets(skale): web3=skale.web3, wallet=skale.wallet, receiver_address=wallet.address, - amount=ETH_AMOUNT_PER_NODE + amount=ETH_AMOUNT_PER_NODE, ) wallets.append(wallet) return wallets @@ -147,10 +145,7 @@ def node_wallets(skale): @pytest.fixture def node_skales(skale, node_wallets): - return [ - SkaleManager(ENDPOINT, ABI_FILEPATH, wallet) - for wallet in node_wallets - ] + return [SkaleManager(ENDPOINT, ABI_FILEPATH, wallet) for wallet in node_wallets] @pytest.fixture @@ -171,10 +166,7 @@ def skale_ima(): @pytest.fixture def ssl_folder(): - pathlib.Path(SSL_CERTIFICATES_FILEPATH).mkdir( - parents=True, - exist_ok=True - ) + pathlib.Path(SSL_CERTIFICATES_FILEPATH).mkdir(parents=True, exist_ok=True) try: yield SSL_CERTIFICATES_FILEPATH finally: @@ -203,63 +195,63 @@ def get_skaled_status_dict( exit_time_reached=False, clear_data_dir=False, start_from_snapshot=False, - start_again=False + start_again=False, ): return { - "subsystemRunning": { - "SnapshotDownloader": snapshot_downloader, - "Blockchain": False, - "Rpc": False + 'subsystemRunning': { + 'SnapshotDownloader': snapshot_downloader, + 'Blockchain': False, + 'Rpc': False, + }, + 'exitState': { + 'ClearDataDir': clear_data_dir, + 'StartAgain': start_again, + 'StartFromSnapshot': start_from_snapshot, + 'ExitTimeReached': exit_time_reached, }, - "exitState": { - "ClearDataDir": clear_data_dir, - "StartAgain": start_again, - "StartFromSnapshot": start_from_snapshot, - "ExitTimeReached": exit_time_reached - } } SECRET_KEY = { - "common_public_key": [ + 'common_public_key': [ 11111111111111111111111111111111111111111111111111111111111111111111111111111, 1111111111111111111111111111111111111111111111111111111111111111111111111111, 1111111111111111111111111111111111111111111111111111111111111111111111111111, - 11111111111111111111111111111111111111111111111111111111111111111111111111111 + 11111111111111111111111111111111111111111111111111111111111111111111111111111, ], - "public_key": [ - "1111111111111111111111111111111111111111111111111111111111111111111111111111", - "1111111111111111111111111111111111111111111111111111111111111111111111111111", - "1111111111111111111111111111111111111111111111111111111111111111111111111111", - "11111111111111111111111111111111111111111111111111111111111111111111111111111" + 'public_key': [ + '1111111111111111111111111111111111111111111111111111111111111111111111111111', + '1111111111111111111111111111111111111111111111111111111111111111111111111111', + '1111111111111111111111111111111111111111111111111111111111111111111111111111', + '11111111111111111111111111111111111111111111111111111111111111111111111111111', ], - "bls_public_keys": [ - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111", # noqa - "1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111" # noqa + 'bls_public_keys': [ + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa + '1111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111:11111111111111111111111111111111111111111111111111111111111111111111111111111', # noqa ], - "t": 11, - "n": 16, - "key_share_name": "BLS_KEY:SCHAIN_ID:33333333333333333333333333333333333333333333333333333333333333333333333333333:NODE_ID:0:DKG_ID:0" # noqa + 't': 11, + 'n': 16, + 'key_share_name': 'BLS_KEY:SCHAIN_ID:33333333333333333333333333333333333333333333333333333333333333333333333333333:NODE_ID:0:DKG_ID:0', # noqa } @pytest.fixture def _schain_name(): - """ Generates default schain name """ + """Generates default schain name""" return get_random_string() @@ -295,8 +287,7 @@ def secret_keys(_schain_name): @pytest.fixture def schain_config(_schain_name, secret_key, predeployed_ima): schain_dir_path = os.path.join(SCHAINS_DIR_PATH, _schain_name) - config_path = os.path.join(schain_dir_path, - f'schain_{_schain_name}.json') + config_path = os.path.join(schain_dir_path, f'schain_{_schain_name}.json') try: pathlib.Path(schain_dir_path).mkdir(parents=True, exist_ok=True) schain_config = generate_schain_config(_schain_name) @@ -349,8 +340,7 @@ def skaled_status_exit_time_reached(_schain_name): @pytest.fixture def skaled_status_repair(_schain_name): - generate_schain_skaled_status_file( - _schain_name, clear_data_dir=True, start_from_snapshot=True) + generate_schain_skaled_status_file(_schain_name, clear_data_dir=True, start_from_snapshot=True) try: yield init_skaled_status(_schain_name) finally: @@ -371,7 +361,7 @@ def skaled_status_broken_file(_schain_name): schain_dir_path = os.path.join(SCHAINS_DIR_PATH, _schain_name) pathlib.Path(schain_dir_path).mkdir(parents=True, exist_ok=True) status_filepath = skaled_status_filepath(_schain_name) - with open(status_filepath, "w") as text_file: + with open(status_filepath, 'w') as text_file: text_file.write('abcd') try: yield SkaledStatus(status_filepath) @@ -390,18 +380,14 @@ def db(): @pytest.fixture def schain_db(db, _schain_name, meta_file): - """ Database with default schain inserted """ + """Database with default schain inserted""" upsert_schain_record_with_config(_schain_name) return _schain_name @pytest.fixture def meta_file(): - meta_info = { - "version": "0.0.0", - "config_stream": CONFIG_STREAM, - "docker_lvmpy_stream": "1.1.1" - } + meta_info = {'version': '0.0.0', 'config_stream': CONFIG_STREAM, 'docker_lvmpy_stream': '1.1.1'} with open(META_FILEPATH, 'w') as meta_file: json.dump(meta_info, meta_file) try: @@ -416,7 +402,7 @@ def schain_on_contracts(skale, nodes, _schain_name): yield create_schain( skale, schain_type=1, # test2 should have 1 index - schain_name=_schain_name + schain_name=_schain_name, ) finally: cleanup_nodes_schains(skale) @@ -424,25 +410,14 @@ def schain_on_contracts(skale, nodes, _schain_name): @pytest.fixture def dutils(): - return DockerUtils( - volume_driver='local', - host='unix://var/run/docker.sock' - ) + return DockerUtils(volume_driver='local', host='unix://var/run/docker.sock') @pytest.fixture def skaled_mock_image(scope='module'): - dutils = DockerUtils( - volume_driver='local', - host='unix://var/run/docker.sock' - ) + dutils = DockerUtils(volume_driver='local', host='unix://var/run/docker.sock') name = 'skaled-mock' - dutils.client.images.build( - tag=name, - rm=True, - nocache=True, - path='tests/skaled-mock' - ) + dutils.client.images.build(tag=name, rm=True, nocache=True, path='tests/skaled-mock') yield name dutils.client.images.remove(name, force=True) @@ -514,18 +489,14 @@ def schain_checks(schain_config, schain_db, current_nodes, rule_controller, esta stream_version=CONFIG_STREAM, current_nodes=current_nodes, estate=estate, - dutils=dutils + dutils=dutils, ) @pytest.fixture def schain_struct(schain_config): schain_name = schain_config['skaleConfig']['sChain']['schainName'] - return { - 'name': schain_name, - 'partOfNode': 0, - 'generation': 0 - } + return {'name': schain_name, 'partOfNode': 0, 'generation': 0} @pytest.fixture @@ -539,10 +510,7 @@ def rule_controller(_schain_name, schain_db, schain_config): own_ip = get_own_ip_from_config(schain_config) node_ips = get_node_ips_from_config(schain_config) return get_test_rule_controller( - name=_schain_name, - base_port=base_port, - own_ip=own_ip, - node_ips=node_ips + name=_schain_name, base_port=base_port, own_ip=own_ip, node_ips=node_ips ) @@ -559,10 +527,7 @@ def uninited_rule_controller(_schain_name): @pytest.fixture def skale_manager_opts(): - return SkaleManagerOpts( - schains_internal_address='0x1656', - nodes_address='0x7742' - ) + return SkaleManagerOpts(schains_internal_address='0x1656', nodes_address='0x7742') @pytest.fixture @@ -579,11 +544,7 @@ def new_upstream(schain_db): @pytest.fixture def estate(skale): - return ExternalState( - ima_linked=True, - chain_id=skale.web3.eth.chain_id, - ranges=ALLOWED_RANGES - ) + return ExternalState(ima_linked=True, chain_id=skale.web3.eth.chain_id, ranges=ALLOWED_RANGES) @pytest.fixture @@ -609,7 +570,7 @@ def upstreams(schain_db, schain_config): f'schain_{name}_9_1687183335.json', f'schain_{name}_11_1687183336.json', f'schain_{name}_11_1687183337.json', - f'schain_{name}_11_1687183339.json' + f'schain_{name}_11_1687183339.json', ] try: for fname in files: @@ -631,3 +592,19 @@ def ima_migration_schedule(schain_db): yield migration_schedule_path finally: os.remove(migration_schedule_path) + + +NCLI_STATUS_DICT = {'repair_ts': CURRENT_TS, 'snapshot_from': '127.0.0.1'} + + +@pytest.fixture +def ncli_status(_schain_name): + schain_dir_path = os.path.join(SCHAINS_DIR_PATH, _schain_name) + pathlib.Path(schain_dir_path).mkdir(parents=True, exist_ok=True) + ncli_status_path = node_cli_status_filepath(_schain_name) + write_json(ncli_status_path, NCLI_STATUS_DICT) + + try: + yield init_node_cli_status(_schain_name) + finally: + shutil.rmtree(schain_dir_path) diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index 5f004cabf..a7897d61e 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -19,7 +19,7 @@ RecreateSkaledMonitor, RegularSkaledMonitor, RepairSkaledMonitor, - UpdateConfigSkaledMonitor + UpdateConfigSkaledMonitor, ) from core.schains.external_config import ExternalConfig from core.schains.exit_scheduler import ExitScheduleFileManager @@ -27,19 +27,20 @@ from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER from web.models.schain import SChainRecord +from tests.utils import CURRENT_TS + CURRENT_TIMESTAMP = 1594903080 CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) def run_ima_container_mock(schain: dict, mainnet_chain_id: int, dutils=None): - image_name, container_name, _, _ = get_container_info( - IMA_CONTAINER, schain['name']) + image_name, container_name, _, _ = get_container_info(IMA_CONTAINER, schain['name']) dutils.safe_rm(container_name) dutils.run_container( image_name=image_name, name=container_name, - entrypoint='bash -c "while true; do foo; sleep 2; done"' + entrypoint='bash -c "while true; do foo; sleep 2; done"', ) @@ -51,15 +52,14 @@ def monitor_schain_container_mock( start_ts=None, dutils=None, sync_node=False, - historic_state=False + historic_state=False, ): - image_name, container_name, _, _ = get_container_info( - SCHAIN_CONTAINER, schain['name']) + image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain['name']) dutils.safe_rm(container_name) dutils.run_container( image_name=image_name, name=container_name, - entrypoint='bash -c "while true; do foo; sleep 2; done"' + entrypoint='bash -c "while true; do foo; sleep 2; done"', ) @@ -69,12 +69,7 @@ def rotation_data(schain_db, skale): @pytest.fixture -def skaled_checks( - schain_db, - skale, - rule_controller, - dutils -): +def skaled_checks(schain_db, skale, rule_controller, dutils): name = schain_db schain_record = SChainRecord.get_by_name(name) return SkaledChecks( @@ -82,7 +77,7 @@ def skaled_checks( schain_record=schain_record, rule_controller=rule_controller, dutils=dutils, - sync_node=False + sync_node=False, ) @@ -99,7 +94,7 @@ def skaled_am( ssl_folder, ima_migration_schedule, dutils, - skaled_checks + skaled_checks, ): name = schain_db schain = skale.schains.get_by_name(name) @@ -108,7 +103,7 @@ def skaled_am( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, - dutils=dutils + dutils=dutils, ) @@ -119,19 +114,14 @@ def config(self) -> CheckRes: @pytest.fixture -def skaled_checks_no_config( - schain_db, - skale, - rule_controller, - dutils -): +def skaled_checks_no_config(schain_db, skale, rule_controller, dutils): name = schain_db schain_record = SChainRecord.get_by_name(name) return SkaledChecksNoConfig( schain_name=name, schain_record=schain_record, rule_controller=rule_controller, - dutils=dutils + dutils=dutils, ) @@ -146,101 +136,76 @@ def rotation_id_updated(self) -> CheckRes: @pytest.fixture -def skaled_checks_outdated_config( - schain_db, - skale, - rule_controller, - dutils -): +def skaled_checks_outdated_config(schain_db, skale, rule_controller, dutils): name = schain_db schain_record = SChainRecord.get_by_name(name) return SkaledChecksConfigOutdated( schain_name=name, schain_record=schain_record, rule_controller=rule_controller, - dutils=dutils + dutils=dutils, ) -def test_get_skaled_monitor_no_config(skaled_am, skaled_checks_no_config, skaled_status, schain_db): +def test_get_skaled_monitor_no_config( + skaled_am, skaled_checks_no_config, skaled_status, schain_db, ncli_status +): name = schain_db schain_record = SChainRecord.get_by_name(name) mon = get_skaled_monitor( - skaled_am, - skaled_checks_no_config.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks_no_config.get_all(), schain_record, skaled_status, ncli_status ) assert mon == NoConfigSkaledMonitor -def test_get_skaled_monitor_regular_and_backup(skaled_am, skaled_checks, skaled_status, schain_db): +def test_get_skaled_monitor_regular_and_backup( + skaled_am, skaled_checks, skaled_status, schain_db, ncli_status +): name = schain_db schain_record = SChainRecord.get_by_name(name) mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == RegularSkaledMonitor schain_record.set_backup_run(True) mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == RegularSkaledMonitor schain_record.set_first_run(False) mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == RegularSkaledMonitor schain_record.set_new_schain(False) mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == BackupSkaledMonitor -def test_get_skaled_monitor_repair(skaled_am, skaled_checks, skaled_status, schain_db): +def test_get_skaled_monitor_repair(skaled_am, skaled_checks, skaled_status, schain_db, ncli_status): name = schain_db schain_record = SChainRecord.get_by_name(name) - schain_record.set_repair_mode(True) + schain_record.set_repair_date(datetime.datetime.utcfromtimestamp(CURRENT_TS - 10)) mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == RepairSkaledMonitor def test_get_skaled_monitor_repair_skaled_status( - skaled_am, - skaled_checks, - schain_db, - skaled_status_repair + skaled_am, skaled_checks, schain_db, skaled_status_repair, ncli_status ): name = schain_db schain_record = SChainRecord.get_by_name(name) mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status_repair + skaled_am, skaled_checks.get_all(), schain_record, skaled_status_repair, ncli_status ) assert mon == RepairSkaledMonitor @@ -249,7 +214,8 @@ def test_get_skaled_monitor_repair_skaled_status( skaled_checks.get_all(), schain_record, skaled_status_repair, - automatic_repair=False + ncli_status, + automatic_repair=False, ) assert mon == RegularSkaledMonitor @@ -277,19 +243,14 @@ def container(self) -> CheckRes: @pytest.fixture -def skaled_checks_new_config( - schain_db, - skale, - rule_controller, - dutils -): +def skaled_checks_new_config(schain_db, skale, rule_controller, dutils): name = schain_db schain_record = SChainRecord.get_by_name(name) return SkaledChecksWithConfig( schain_name=name, schain_record=schain_record, rule_controller=rule_controller, - dutils=dutils + dutils=dutils, ) @@ -308,6 +269,7 @@ def test_get_skaled_monitor_reload_group( secret_keys, ssl_folder, skaled_checks, + ncli_status, dutils ): name = schain_db @@ -319,8 +281,7 @@ def test_get_skaled_monitor_reload_group( schain = skale.schains.get_by_name(name) with mock.patch( - f'{__name__}.SkaledActionManager.upstream_finish_ts', - new_callable=mock.PropertyMock + f'{__name__}.SkaledActionManager.upstream_finish_ts', new_callable=mock.PropertyMock ) as finish_ts_mock: finish_ts_mock.return_value = CURRENT_TIMESTAMP - 10 skaled_am = SkaledActionManager( @@ -328,14 +289,9 @@ def test_get_skaled_monitor_reload_group( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, - dutils=dutils - ) - mon = get_skaled_monitor( - skaled_am, - state, - schain_record, - skaled_status + dutils=dutils, ) + mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) assert mon == RegularSkaledMonitor finish_ts_mock.return_value = CURRENT_TIMESTAMP + 10 skaled_am = SkaledActionManager( @@ -343,14 +299,9 @@ def test_get_skaled_monitor_reload_group( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, - dutils=dutils - ) - mon = get_skaled_monitor( - skaled_am, - state, - schain_record, - skaled_status + dutils=dutils, ) + mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) assert mon == ReloadGroupSkaledMonitor @@ -369,7 +320,8 @@ def test_get_skaled_monitor_reload_ip( secret_keys, ssl_folder, skaled_checks, - dutils + ncli_status, + dutils, ): name = schain_db schain_record = SChainRecord.get_by_name(name) @@ -386,26 +338,16 @@ def test_get_skaled_monitor_reload_ip( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, - dutils=dutils - ) - mon = get_skaled_monitor( - skaled_am, - state, - schain_record, - skaled_status + dutils=dutils, ) + mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) assert mon == RegularSkaledMonitor estate = econfig.read() estate['reload_ts'] = CURRENT_TIMESTAMP + 10 econfig.write(estate) - mon = get_skaled_monitor( - skaled_am, - state, - schain_record, - skaled_status - ) + mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) assert mon == ReloadIpSkaledMonitor @@ -423,7 +365,8 @@ def test_get_skaled_monitor_new_node( skaled_status, skaled_checks, ima_migration_schedule, - dutils + ncli_status, + dutils, ): name = schain_db schain_record = SChainRecord.get_by_name(name) @@ -431,23 +374,19 @@ def test_get_skaled_monitor_new_node( finish_ts = CURRENT_TIMESTAMP + 10 with mock.patch( - f'{__name__}.SkaledActionManager.finish_ts', - new_callable=mock.PropertyMock + f'{__name__}.SkaledActionManager.finish_ts', new_callable=mock.PropertyMock ) as finish_ts_mock: skaled_am = SkaledActionManager( schain=schain, rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, - dutils=dutils + dutils=dutils, ) finish_ts_mock.return_value = finish_ts mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == NewNodeSkaledMonitor @@ -458,6 +397,7 @@ def test_get_skaled_monitor_update_config( skaled_checks_new_config, schain_db, skaled_status_exit_time_reached, + ncli_status, ): name = schain_db schain_record = SChainRecord.get_by_name(name) @@ -465,50 +405,34 @@ def test_get_skaled_monitor_update_config( status['skaled_container'] = False mon = get_skaled_monitor( - skaled_am, - status, - schain_record, - skaled_status_exit_time_reached + skaled_am, status, schain_record, skaled_status_exit_time_reached, ncli_status ) assert mon == UpdateConfigSkaledMonitor status = skaled_checks_new_config.get_all() status['skaled_container'] = False mon = get_skaled_monitor( - skaled_am, - status, - schain_record, - skaled_status_exit_time_reached + skaled_am, status, schain_record, skaled_status_exit_time_reached, ncli_status ) assert mon == UpdateConfigSkaledMonitor def test_get_skaled_monitor_recreate( - skaled_am, - skaled_checks, - schain_db, - skaled_status + skaled_am, skaled_checks, schain_db, skaled_status, ncli_status ): name = schain_db schain_record = SChainRecord.get_by_name(name) schain_record.set_ssl_change_date(datetime.datetime.now()) - with mock.patch('core.schains.ssl.get_ssl_files_change_date', - return_value=datetime.datetime.now()): + with mock.patch( + 'core.schains.ssl.get_ssl_files_change_date', return_value=datetime.datetime.now() + ): mon = get_skaled_monitor( - skaled_am, - skaled_checks.get_all(), - schain_record, - skaled_status + skaled_am, skaled_checks.get_all(), schain_record, skaled_status, ncli_status ) assert mon == RecreateSkaledMonitor -def test_regular_skaled_monitor( - skaled_am, - skaled_checks, - clean_docker, - dutils -): +def test_regular_skaled_monitor(skaled_am, skaled_checks, clean_docker, dutils): mon = RegularSkaledMonitor(skaled_am, skaled_checks) mon.run() assert skaled_am.rc.is_rules_synced @@ -522,8 +446,7 @@ def test_backup_skaled_monitor(skaled_am, skaled_checks, clean_docker, dutils): mon.run() assert skaled_am.rc.is_rules_synced assert dutils.get_vol(skaled_am.name) - schain_container = dutils.safe_get_container( - f'skale_schain_{skaled_am.name}') + schain_container = dutils.safe_get_container(f'skale_schain_{skaled_am.name}') assert schain_container assert '--download-snapshot' in dutils.get_cmd(schain_container.id) assert dutils.safe_get_container(f'skale_ima_{skaled_am.name}') @@ -538,8 +461,7 @@ def test_repair_skaled_monitor(skaled_am, skaled_checks, clean_docker, dutils): assert dutils.get_vol(skaled_am.name) assert dutils.get_vol_created_ts(skaled_am.name) > ts_before - schain_container = dutils.safe_get_container( - f'skale_schain_{skaled_am.name}') + schain_container = dutils.safe_get_container(f'skale_schain_{skaled_am.name}') assert schain_container assert '--download-snapshot' in dutils.get_cmd(schain_container.id) assert dutils.get_container_created_ts(schain_container.id) > ts_before @@ -550,8 +472,9 @@ def test_group_reload_skaled_monitor(skaled_am, skaled_checks, clean_docker, dut mon = ReloadGroupSkaledMonitor(skaled_am, skaled_checks) ts = time.time() esfm = ExitScheduleFileManager(mon.am.name) - with mock.patch('core.schains.monitor.action.get_finish_ts_from_latest_upstream', - return_value=ts): + with mock.patch( + 'core.schains.monitor.action.get_finish_ts_from_latest_upstream', return_value=ts + ): mon.run() assert esfm.exit_ts == ts assert skaled_am.rc.is_rules_synced @@ -563,8 +486,9 @@ def test_group_reload_skaled_monitor(skaled_am, skaled_checks, clean_docker, dut @pytest.mark.skip def test_group_reload_skaled_monitor_failed_skaled(skaled_am, skaled_checks, clean_docker, dutils): mon = ReloadGroupSkaledMonitor(skaled_am, skaled_checks) - with mock.patch('core.schains.monitor.containers.run_schain_container') \ - as run_skaled_container_mock: + with mock.patch( + 'core.schains.monitor.containers.run_schain_container' + ) as run_skaled_container_mock: mon.run() assert skaled_am.rc.is_rules_synced assert run_skaled_container_mock.assert_not_called() @@ -575,19 +499,13 @@ def test_recreate_skaled_monitor(skaled_am, skaled_checks, clean_docker, dutils) ts_before = time.time() time.sleep(1) mon.run() - schain_container = dutils.safe_get_container( - f'skale_schain_{skaled_am.name}') + schain_container = dutils.safe_get_container(f'skale_schain_{skaled_am.name}') assert schain_container assert dutils.get_container_created_ts(schain_container.id) > ts_before def test_update_config_skaled_monitor( - skaled_am, - skaled_checks, - dutils, - clean_docker, - upstreams, - skaled_status_exit_time_reached + skaled_am, skaled_checks, dutils, clean_docker, upstreams, skaled_status_exit_time_reached ): name = skaled_checks.name ts_before = time.time() @@ -596,13 +514,10 @@ def test_update_config_skaled_monitor( mon.run() assert dutils.get_vol(name) assert dutils.get_vol_created_ts(name) > ts_before - schain_container = dutils.safe_get_container( - f'skale_schain_{name}' - ) + schain_container = dutils.safe_get_container(f'skale_schain_{name}') assert schain_container assert dutils.get_container_created_ts(schain_container.id) > ts_before - os.stat(os.path.join(schain_config_dir(name), - f'schain_{name}.json')).st_mtime > ts_before + os.stat(os.path.join(schain_config_dir(name), f'schain_{name}.json')).st_mtime > ts_before def test_no_config_monitor(skaled_am, skaled_checks, clean_docker, dutils): @@ -618,7 +533,6 @@ def test_new_node_monitor(skaled_am, skaled_checks, clean_docker, dutils): mon.run() assert skaled_am.rc.is_rules_synced assert dutils.get_vol(skaled_am.name) - schain_container = dutils.safe_get_container( - f'skale_schain_{skaled_am.name}') + schain_container = dutils.safe_get_container(f'skale_schain_{skaled_am.name}') assert schain_container assert '--download-snapshot' in dutils.get_cmd(schain_container.id) diff --git a/tests/schains/skaled_status_test.py b/tests/schains/skaled_status_test.py index 4981698ef..5b96d8987 100644 --- a/tests/schains/skaled_status_test.py +++ b/tests/schains/skaled_status_test.py @@ -1,6 +1,14 @@ -from core.schains.skaled_status import SkaledStatus +from core.schains.skaled_status import ( + node_cli_status_filepath, + NodeCliStatus, + SkaledStatus, +) from core.schains.config.directory import skaled_status_filepath +CURRENT_TS = 1594903080 + +NCLI_STATUS_DICT = {'repair_ts': CURRENT_TS, 'snapshot_from': '127.0.0.1'} + def test_skaled_status(skaled_status, _schain_name): status_filepath = skaled_status_filepath(_schain_name) @@ -9,14 +17,14 @@ def test_skaled_status(skaled_status, _schain_name): assert skaled_status.subsystem_running == { 'SnapshotDownloader': False, 'Blockchain': False, - 'Rpc': False + 'Rpc': False, } assert skaled_status.exit_state == { 'ClearDataDir': False, 'StartAgain': False, 'StartFromSnapshot': False, - 'ExitTimeReached': False + 'ExitTimeReached': False, } @@ -47,3 +55,19 @@ def test_log(skaled_status, _schain_name, caplog): status_filepath = skaled_status_filepath(_schain_name) skaled_status = SkaledStatus(filepath=status_filepath) skaled_status.log() + + +def test_node_cli_status_empty(_schain_name): + status_filepath = node_cli_status_filepath(_schain_name) + cli_status = NodeCliStatus(filepath=status_filepath) + + assert cli_status.repair_ts is None + assert cli_status.snapshot_from is None + + +def test_node_cli_status_repair(_schain_name, ncli_status): + status_filepath = node_cli_status_filepath(_schain_name) + cli_status = NodeCliStatus(filepath=status_filepath) + + assert cli_status.repair_ts == CURRENT_TS + assert cli_status.snapshot_from == '127.0.0.1' diff --git a/tests/utils.py b/tests/utils.py index 29f37e745..a3631ffc1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -37,6 +37,7 @@ from web.models.schain import upsert_schain_record +CURRENT_TS = 1594903080 DIR_PATH = os.path.dirname(os.path.realpath(__file__)) ENDPOINT = os.getenv('ENDPOINT') diff --git a/tools/configs/schains.py b/tools/configs/schains.py index 566709ca8..f08338d45 100644 --- a/tools/configs/schains.py +++ b/tools/configs/schains.py @@ -44,6 +44,7 @@ MAX_SCHAIN_FAILED_RPC_COUNT = int(os.getenv('MAX_SCHAIN_FAILED_RPC_COUNT', 5)) SKALED_STATUS_FILENAME = 'skaled.status' +NODE_CLI_STATUS_FILENAME = 'node_cli.status' STATIC_SCHAIN_DIR_NAME = 'schains' SCHAIN_STATE_PATH = os.path.join(SKALE_LIB_PATH, 'schains') diff --git a/web/migrations.py b/web/migrations.py index 3341a49bf..44ce37fef 100644 --- a/web/migrations.py +++ b/web/migrations.py @@ -65,6 +65,9 @@ def run_migrations(db, migrator): add_backup_run_field(db, migrator) add_sync_config_run_field(db, migrator) + # 2.7 -> 2.8 update fields + add_repair_date_field(db, migrator) + def add_new_schain_field(db, migrator): add_column( @@ -157,6 +160,13 @@ def add_dkg_step_field(db, migrator): ) +def add_repair_date_field(db, migrator): + add_column( + db, migrator, 'SChainRecord', 'repair_date', + DateTimeField(default=datetime.now()) + ) + + def find_column(db, table_name, column_name): columns = db.get_columns(table_name) return next((x for x in columns if x.name == column_name), None) diff --git a/web/models/schain.py b/web/models/schain.py index c685d864a..e6d6d2d5d 100644 --- a/web/models/schain.py +++ b/web/models/schain.py @@ -54,6 +54,8 @@ class SChainRecord(BaseModel): ssl_change_date = DateTimeField(default=datetime.now()) + repair_date = DateTimeField(default=datetime.now()) + @classmethod def add(cls, name): try: @@ -205,6 +207,11 @@ def is_dkg_unsuccessful(self) -> bool: DKGStatus.FAILED ] + def set_repair_date(self, value: datetime) -> None: + logger.info(f'Changing repair_date for {self.name} to {value}') + self.repair_date = value + self.save() + def create_tables(): logger.info('Creating schainrecord table...') From 5f5cf891d6e6a1c3961672e8d02319cfe477e5d3 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 17 Jun 2024 18:23:46 +0000 Subject: [PATCH 015/103] Move skaled_status.py to status.py --- core/schains/cleaner.py | 12 ++--- core/schains/monitor/action.py | 2 +- core/schains/monitor/main.py | 2 +- core/schains/monitor/skaled_monitor.py | 46 ++++++++++---------- core/schains/{skaled_status.py => status.py} | 0 tests/conftest.py | 2 +- tests/schains/skaled_status_test.py | 2 +- web/routes/schains.py | 2 +- 8 files changed, 34 insertions(+), 34 deletions(-) rename core/schains/{skaled_status.py => status.py} (100%) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 387f1861d..6ad80ceb3 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -248,16 +248,16 @@ def cleanup_schain( dutils=dutils, sync_node=SYNC_NODE ) - status = checks.get_all() - if status['skaled_container'] or is_exited( + check_status = checks.get_all() + if check_status['skaled_container'] or is_exited( schain_name, container_type=ContainerType.schain, dutils=dutils ): remove_schain_container(schain_name, dutils=dutils) - if status['volume']: + if check_status['volume']: remove_schain_volume(schain_name, dutils=dutils) - if status['firewall_rules']: + if check_status['firewall_rules']: conf = ConfigFileManager(schain_name).skaled_config base_port = get_base_port_from_config(conf) own_ip = get_own_ip_from_config(conf) @@ -273,13 +273,13 @@ def cleanup_schain( ) rc.cleanup() if estate is not None and estate.ima_linked: - if status.get('ima_container', False) or is_exited( + if check_status.get('ima_container', False) or is_exited( schain_name, container_type=ContainerType.ima, dutils=dutils ): remove_ima_container(schain_name, dutils=dutils) - if status['config_dir']: + if check_status['config_dir']: remove_config_dir(schain_name) mark_schain_deleted(schain_name) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index d4baa4e20..5de9ea78c 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -73,7 +73,7 @@ ) from core.schains.ima import ImaData from core.schains.external_config import ExternalConfig, ExternalState -from core.schains.skaled_status import init_skaled_status +from core.schains.status import init_skaled_status from tools.configs import SYNC_NODE from tools.configs.containers import IMA_CONTAINER, SCHAIN_CONTAINER diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index a1e65aa23..76035d823 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -49,7 +49,7 @@ from core.schains.external_config import ExternalConfig, ExternalState from core.schains.task import keep_tasks_running, Task from core.schains.config.static_params import get_automatic_repair_option -from core.schains.skaled_status import get_node_cli_status, get_skaled_status +from core.schains.status import get_node_cli_status, get_skaled_status from core.node import get_current_nodes from tools.docker_utils import DockerUtils diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 4095bac93..89f9d55c7 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -26,7 +26,7 @@ from core.schains.checks import SkaledChecks from core.schains.monitor.action import SkaledActionManager from core.schains.config.main import get_number_of_secret_shares -from core.schains.skaled_status import NodeCliStatus, SkaledStatus +from core.schains.status import NodeCliStatus, SkaledStatus from core.schains.ssl import ssl_reload_needed from tools.configs import SYNC_NODE from tools.resources import get_statsd_client @@ -231,7 +231,7 @@ def is_backup_mode(schain_record: SChainRecord) -> bool: def is_repair_mode( schain_record: SChainRecord, - status: Dict, + check_status: Dict, skaled_status: Optional[SkaledStatus], ncli_status: Optional[NodeCliStatus], automatic_repair: bool, @@ -239,26 +239,26 @@ def is_repair_mode( repair_ts = int(schain_record.repair_date.timestamp()) if ncli_status is not None and ncli_status.repair_ts > repair_ts: return True - return automatic_repair and is_skaled_repair_status(status, skaled_status) + return automatic_repair and is_skaled_repair_internal(check_status, skaled_status) -def is_reload_group_mode(status: Dict, finish_ts: Optional[int]) -> bool: +def is_reload_group_mode(check_status: Dict, finish_ts: Optional[int]) -> bool: ts = int(time.time()) if finish_ts is None: return False - return finish_ts > ts and status['config'] and not status['config_updated'] + return finish_ts > ts and check_status['config'] and not check_status['config_updated'] -def is_reload_ip_mode(status: Dict, reload_ts: Optional[int]) -> bool: +def is_reload_ip_mode(check_status: Dict, reload_ts: Optional[int]) -> bool: if reload_ts is None: return False - return status['config'] and not status['config_updated'] + return check_status['config'] and not check_status['config_updated'] -def is_config_update_time(status: Dict, skaled_status: Optional[SkaledStatus]) -> bool: +def is_config_update_time(check_status: Dict, skaled_status: Optional[SkaledStatus]) -> bool: if not skaled_status: return False - return not status['skaled_container'] and skaled_status.exit_time_reached + return not check_status['skaled_container'] and skaled_status.exit_time_reached def is_recreate_mode(schain_record: SChainRecord) -> bool: @@ -273,21 +273,21 @@ def is_new_node_mode(schain_record: SChainRecord, finish_ts: Optional[int]) -> b return finish_ts > ts and secret_shares_number == 1 -def is_skaled_repair_status(status: Dict, skaled_status: Optional[SkaledStatus]) -> bool: +def is_skaled_repair_internal(check_status: Dict, skaled_status: Optional[SkaledStatus]) -> bool: if skaled_status is None: return False skaled_status.log() needs_repair = skaled_status.clear_data_dir and skaled_status.start_from_snapshot - return not status['skaled_container'] and needs_repair + return not check_status['skaled_container'] and needs_repair -def no_config(status: Dict) -> bool: - return not status['config'] +def no_config(check_status: Dict) -> bool: + return not check_status['config'] def get_skaled_monitor( action_manager: SkaledActionManager, - status: Dict, + check_status: Dict, schain_record: SChainRecord, skaled_status: SkaledStatus, ncli_status: NodeCliStatus, @@ -300,32 +300,32 @@ def get_skaled_monitor( mon_type: Type[BaseSkaledMonitor] = RegularSkaledMonitor if SYNC_NODE: - if no_config(status): + if no_config(check_status): mon_type = NoConfigSkaledMonitor if is_recreate_mode(schain_record): mon_type = RecreateSkaledMonitor - elif is_config_update_time(status, skaled_status): + elif is_config_update_time(check_status, skaled_status): mon_type = UpdateConfigSkaledMonitor - elif is_reload_group_mode(status, action_manager.upstream_finish_ts): + elif is_reload_group_mode(check_status, action_manager.upstream_finish_ts): mon_type = ReloadGroupSkaledMonitor - elif is_reload_ip_mode(status, action_manager.econfig.reload_ts): + elif is_reload_ip_mode(check_status, action_manager.econfig.reload_ts): mon_type = ReloadIpSkaledMonitor return mon_type - if no_config(status): + if no_config(check_status): mon_type = NoConfigSkaledMonitor elif is_backup_mode(schain_record): mon_type = BackupSkaledMonitor - elif is_repair_mode(schain_record, status, skaled_status, ncli_status, automatic_repair): + elif is_repair_mode(schain_record, check_status, skaled_status, ncli_status, automatic_repair): mon_type = RepairSkaledMonitor elif is_recreate_mode(schain_record): mon_type = RecreateSkaledMonitor elif is_new_node_mode(schain_record, action_manager.finish_ts): mon_type = NewNodeSkaledMonitor - elif is_config_update_time(status, skaled_status): + elif is_config_update_time(check_status, skaled_status): mon_type = UpdateConfigSkaledMonitor - elif is_reload_group_mode(status, action_manager.upstream_finish_ts): + elif is_reload_group_mode(check_status, action_manager.upstream_finish_ts): mon_type = ReloadGroupSkaledMonitor - elif is_reload_ip_mode(status, action_manager.econfig.reload_ts): + elif is_reload_ip_mode(check_status, action_manager.econfig.reload_ts): mon_type = ReloadIpSkaledMonitor return mon_type diff --git a/core/schains/skaled_status.py b/core/schains/status.py similarity index 100% rename from core/schains/skaled_status.py rename to core/schains/status.py diff --git a/tests/conftest.py b/tests/conftest.py index cd17972ee..b8943b4d9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,7 +42,7 @@ from core.schains.cleaner import remove_schain_container, remove_schain_volume from core.schains.ima import ImaData from core.schains.external_config import ExternalConfig, ExternalState -from core.schains.skaled_status import ( +from core.schains.status import ( init_node_cli_status, init_skaled_status, node_cli_status_filepath, diff --git a/tests/schains/skaled_status_test.py b/tests/schains/skaled_status_test.py index 5b96d8987..807da04e2 100644 --- a/tests/schains/skaled_status_test.py +++ b/tests/schains/skaled_status_test.py @@ -1,4 +1,4 @@ -from core.schains.skaled_status import ( +from core.schains.status import ( node_cli_status_filepath, NodeCliStatus, SkaledStatus, diff --git a/web/routes/schains.py b/web/routes/schains.py index 223649fb2..acb25fc87 100644 --- a/web/routes/schains.py +++ b/web/routes/schains.py @@ -31,7 +31,7 @@ get_default_rule_controller, get_sync_agent_ranges ) -from core.schains.skaled_status import init_skaled_status +from core.schains.status import init_skaled_status from core.schains.ima import get_ima_version_after_migration from core.schains.info import get_schain_info_by_name, get_skaled_version from core.schains.cleaner import get_schains_on_node From 88436bfd8ac9a2719b25db26193888d702cd4b27 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 17 Jun 2024 18:25:55 +0000 Subject: [PATCH 016/103] Remove repair api route --- tests/db_test.py | 40 ---------------------------------------- web/models/schain.py | 20 -------------------- web/routes/schains.py | 17 +---------------- 3 files changed, 1 insertion(+), 76 deletions(-) diff --git a/tests/db_test.py b/tests/db_test.py index 40ede0ca5..9c0f7417d 100644 --- a/tests/db_test.py +++ b/tests/db_test.py @@ -7,8 +7,6 @@ get_schains_statuses, mark_schain_deleted, set_schains_first_run, - switch_off_repair_mode, - toggle_schain_repair_mode, SChainRecord, upsert_schain_record ) @@ -67,44 +65,6 @@ def test_schains_first_run(db, upsert_db): SChainRecord.first_run == True).count() == RECORDS_NUMBER # noqa: E712 -def test_toggle_repair_mode(db, upsert_db): - result = toggle_schain_repair_mode('schain-0') - assert result - assert SChainRecord.select().where( - SChainRecord.repair_mode == True).count() == 1 # noqa: E712 - cursor = SChainRecord.select().where( - SChainRecord.repair_mode == True).execute() # noqa: E712 - records = list(cursor) - assert len(records) == 1 - assert records[0].name == 'schain-0' - assert records[0].snapshot_from == '' - - result = toggle_schain_repair_mode('schain-0', '1.1.1.1') - cursor = SChainRecord.select().where( - SChainRecord.repair_mode == True).execute() # noqa: E712 - records = list(cursor) - assert len(records) == 1 - assert records[0].name == 'schain-0' - assert records[0].snapshot_from == '1.1.1.1' - - switch_off_repair_mode('schain-0') - assert SChainRecord.select().where( - SChainRecord.repair_mode == True).count() == 0 # noqa: E712 - cursor = SChainRecord.select().where( - SChainRecord.name == 'schain-0').execute() # noqa: E712 - records = list(cursor) - assert records[0].name == 'schain-0' - assert not records[0].repair_mode - assert records[0].snapshot_from == '' - - -def test_toggle_repair_mode_schain_not_exists(db, upsert_db): - result = toggle_schain_repair_mode('undefined-schain') - assert not result - assert SChainRecord.select().where( - SChainRecord.repair_mode == True).count() == 0 # noqa: E712 - - def test_get_schains_names(db, upsert_db): mark_schain_deleted('schain-0') result = get_schains_names() diff --git a/web/models/schain.py b/web/models/schain.py index e6d6d2d5d..2637f540b 100644 --- a/web/models/schain.py +++ b/web/models/schain.py @@ -303,23 +303,3 @@ def get_schains_names(include_deleted=False): def get_schains_statuses(include_deleted=False): return [SChainRecord.to_dict(r) for r in SChainRecord.get_all_records(include_deleted)] - - -def toggle_schain_repair_mode(name, snapshot_from: str = ''): - logger.info(f'Toggling repair mode for schain {name}') - query = SChainRecord.update( - repair_mode=True, - snapshot_from=snapshot_from - ).where(SChainRecord.name == name) - count = query.execute() - return count > 0 - - -def switch_off_repair_mode(name): - logger.info(f'Disabling repair mode for schain {name}') - query = SChainRecord.update( - repair_mode=False, - snapshot_from='' - ).where(SChainRecord.name == name) - count = query.execute() - return count > 0 diff --git a/web/routes/schains.py b/web/routes/schains.py index acb25fc87..060a83ffd 100644 --- a/web/routes/schains.py +++ b/web/routes/schains.py @@ -35,7 +35,7 @@ from core.schains.ima import get_ima_version_after_migration from core.schains.info import get_schain_info_by_name, get_skaled_version from core.schains.cleaner import get_schains_on_node -from web.models.schain import get_schains_statuses, toggle_schain_repair_mode +from web.models.schain import get_schains_statuses from web.helper import ( construct_ok_response, construct_err_response, @@ -132,21 +132,6 @@ def firewall_rules(): return construct_ok_response({'endpoints': endpoints}) -@schains_bp.route(get_api_url(BLUEPRINT_NAME, 'repair'), methods=['POST']) -def repair(): - logger.debug(request) - schain_name = request.json.get('schain_name') - snapshot_from = request.json.get('snapshot_from', '') - result = toggle_schain_repair_mode( - schain_name, snapshot_from=snapshot_from) - if result: - return construct_ok_response() - else: - return construct_err_response( - msg=f'No schain with name {schain_name}' - ) - - @schains_bp.route(get_api_url(BLUEPRINT_NAME, 'get'), methods=['GET']) @g_skale def get_schain(): From 6f73aa7a1e6e12179b6f7c46b850cf364b18edfb Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 18 Jun 2024 12:34:38 +0000 Subject: [PATCH 017/103] Remove repair test. Fix monitor --- core/schains/monitor/main.py | 8 ++++---- tests/routes/schains_test.py | 35 +---------------------------------- 2 files changed, 5 insertions(+), 38 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 76035d823..1ba0209a5 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -160,19 +160,19 @@ def run_skaled_pipeline( econfig=ExternalConfig(name), dutils=dutils ) - status = skaled_checks.get_all(log=False, expose=True) + check_status = skaled_checks.get_all(log=False, expose=True) automatic_repair = get_automatic_repair_option() api_status = get_api_checks_status( - status=status, allowed=TG_ALLOWED_CHECKS) + status=check_status, allowed=TG_ALLOWED_CHECKS) notify_checks(name, node_config.all(), api_status) - logger.info('Skaled status: %s', status) + logger.info('Skaled check status: %s', check_status) logger.info('Upstream config %s', skaled_am.upstream_config_path) mon = get_skaled_monitor( action_manager=skaled_am, - status=status, + check_status=check_status, schain_record=schain_record, skaled_status=skaled_status, ncli_status=ncli_status, diff --git a/tests/routes/schains_test.py b/tests/routes/schains_test.py index 329987985..9395005dd 100644 --- a/tests/routes/schains_test.py +++ b/tests/routes/schains_test.py @@ -10,7 +10,7 @@ from core.node_config import NodeConfig from core.schains.config.file_manager import ConfigFileManager -from tests.utils import get_bp_data, get_test_rule_controller, post_bp_data +from tests.utils import get_bp_data, get_test_rule_controller from web.models.schain import SChainRecord, upsert_schain_record from web.routes.schains import schains_bp from web.helper import get_api_url @@ -97,39 +97,6 @@ def test_firewall_rules_route(skale_bp, schain_config): } -def test_enable_repair_mode(skale_bp, schain_db): - schain_name = schain_db - data = post_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'repair'), - params={'schain_name': schain_name}) - assert data == { - 'payload': {}, - 'status': 'ok' - } - r = upsert_schain_record(schain_name) - assert r.repair_mode - assert r.snapshot_from == '' - - data = post_bp_data( - skale_bp, - get_api_url(BLUEPRINT_NAME, 'repair'), - params={'schain_name': schain_name, 'snapshot_from': '1.1.1.1'} - ) - assert data == { - 'payload': {}, - 'status': 'ok' - } - r = upsert_schain_record(schain_name) - assert r.repair_mode - assert r.snapshot_from == '1.1.1.1' - - data = post_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'repair'), - params={'schain_name': 'undefined-schain'}) - assert data == { - 'payload': 'No schain with name undefined-schain', - 'status': 'error' - } - - def test_get_schain( skale_bp, skale, From 0728f9a695cc3f3e250d1c242d258e0d4f87005c Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 19 Jun 2024 18:48:12 +0000 Subject: [PATCH 018/103] Fix snapshot_from --- core/schains/cmd.py | 4 +++- core/schains/monitor/action.py | 14 +++++++------- core/schains/monitor/containers.py | 3 ++- core/schains/monitor/main.py | 1 + core/schains/runner.py | 3 ++- tests/conftest.py | 2 +- tests/schains/monitor/action/skaled_action_test.py | 4 ++++ tests/schains/monitor/skaled_monitor_test.py | 6 ++++++ 8 files changed, 26 insertions(+), 11 deletions(-) diff --git a/core/schains/cmd.py b/core/schains/cmd.py index c96e2432e..3df08f628 100644 --- a/core/schains/cmd.py +++ b/core/schains/cmd.py @@ -17,6 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from typing import Optional + from core.schains.config.file_manager import ConfigFileManager from core.schains.config.helper import get_schain_ports_from_config from core.schains.config.main import get_skaled_container_config_path @@ -34,7 +36,7 @@ def get_schain_container_cmd( download_snapshot: bool = False, enable_ssl: bool = True, sync_node: bool = False, - snapshot_from: str = '' + snapshot_from: Optional[str] = None ) -> str: """Returns parameters that will be passed to skaled binary in the sChain container""" opts = get_schain_container_base_opts(schain_name, enable_ssl=enable_ssl, sync_node=sync_node) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 5de9ea78c..56512e7f7 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -35,21 +35,18 @@ run_dkg, save_dkg_results ) -from core.schains.ima import get_migration_ts as get_ima_migration_ts -from core.schains.ssl import update_ssl_change_date - from core.schains.cleaner import ( remove_ima_container, remove_schain_container, remove_schain_volume ) +from core.schains.ima import get_migration_ts as get_ima_migration_ts, ImaData +from core.schains.ssl import update_ssl_change_date +from core.schains.status import NodeCliStatus from core.schains.firewall.types import IRuleController - from core.schains.volume import init_data_volume from core.schains.exit_scheduler import ExitScheduleFileManager - from core.schains.limits import get_schain_type - from core.schains.monitor.containers import monitor_schain_container, monitor_ima_container from core.schains.monitor.rpc import handle_failed_schain_rpc from core.schains.runner import ( @@ -71,7 +68,6 @@ get_node_ips_from_config, get_own_ip_from_config ) -from core.schains.ima import ImaData from core.schains.external_config import ExternalConfig, ExternalState from core.schains.status import init_skaled_status @@ -298,6 +294,7 @@ def __init__( rule_controller: IRuleController, checks: SkaledChecks, node_config: NodeConfig, + ncli_status: NodeCliStatus, econfig: Optional[ExternalConfig] = None, dutils: DockerUtils = None, node_options: NodeOptions = None @@ -320,6 +317,7 @@ def __init__( self.statsd_client = get_statsd_client() self.node_options = node_options or NodeOptions() + self.ncli_status = ncli_status super().__init__(name=schain['name']) @@ -374,11 +372,13 @@ def skaled_container( download_snapshot, start_ts ) + snapshot_from = self.ncli_status.snapshot_from if self.ncli_status else None monitor_schain_container( self.schain, schain_record=self.schain_record, skaled_status=self.skaled_status, download_snapshot=download_snapshot, + snapshot_from=snapshot_from, start_ts=start_ts, abort_on_exit=abort_on_exit, dutils=self.dutils, diff --git a/core/schains/monitor/containers.py b/core/schains/monitor/containers.py index ef5c9b284..cdfdbd3ce 100644 --- a/core/schains/monitor/containers.py +++ b/core/schains/monitor/containers.py @@ -54,6 +54,7 @@ def monitor_schain_container( skaled_status, download_snapshot=False, start_ts=None, + snapshot_from: Optional[str] = None, abort_on_exit: bool = True, dutils: Optional[DockerUtils] = None, sync_node: bool = False, @@ -81,7 +82,7 @@ def monitor_schain_container( download_snapshot=download_snapshot, start_ts=start_ts, dutils=dutils, - snapshot_from=schain_record.snapshot_from, + snapshot_from=snapshot_from, sync_node=sync_node, historic_state=historic_state, ) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 1ba0209a5..7b3c684d8 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -157,6 +157,7 @@ def run_skaled_pipeline( rule_controller=rc, checks=skaled_checks, node_config=node_config, + ncli_status=ncli_status, econfig=ExternalConfig(name), dutils=dutils ) diff --git a/core/schains/runner.py b/core/schains/runner.py index e65aa6394..7676b844b 100644 --- a/core/schains/runner.py +++ b/core/schains/runner.py @@ -19,6 +19,7 @@ import copy import logging +from typing import Optional from docker.types import LogConfig, Ulimit @@ -183,7 +184,7 @@ def run_schain_container( volume_mode=None, ulimit_check=True, enable_ssl=True, - snapshot_from: str = '', + snapshot_from: Optional[str] = None, sync_node=False, historic_state=False ): diff --git a/tests/conftest.py b/tests/conftest.py index b8943b4d9..47ea33cd7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -607,4 +607,4 @@ def ncli_status(_schain_name): try: yield init_node_cli_status(_schain_name) finally: - shutil.rmtree(schain_dir_path) + shutil.rmtree(schain_dir_path, ignore_errors=True) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 3281ec6bd..0fb8d731b 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -46,6 +46,7 @@ def monitor_schain_container_mock( schain_record, skaled_status, download_snapshot=False, + snapshot_from='', start_ts=None, abort_on_exit=True, dutils=None, @@ -92,6 +93,7 @@ def skaled_am( secret_key, ssl_folder, ima_migration_schedule, + ncli_status, dutils, skaled_checks ): @@ -102,6 +104,7 @@ def skaled_am( rule_controller=rule_controller, checks=skaled_checks, node_config=node_config, + ncli_status=ncli_status, dutils=dutils ) @@ -145,6 +148,7 @@ def test_skaled_container_with_snapshot_action(skaled_am): schain_record=skaled_am.schain_record, skaled_status=skaled_am.skaled_status, download_snapshot=True, + snapshot_from='127.0.0.1', start_ts=None, abort_on_exit=True, dutils=skaled_am.dutils, diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index a7897d61e..96a740f78 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -93,6 +93,7 @@ def skaled_am( secret_key, ssl_folder, ima_migration_schedule, + ncli_status, dutils, skaled_checks, ): @@ -102,6 +103,7 @@ def skaled_am( schain=schain, rule_controller=rule_controller, node_config=node_config, + ncli_status=ncli_status, checks=skaled_checks, dutils=dutils, ) @@ -289,6 +291,7 @@ def test_get_skaled_monitor_reload_group( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, + ncli_status=ncli_status, dutils=dutils, ) mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) @@ -299,6 +302,7 @@ def test_get_skaled_monitor_reload_group( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, + ncli_status=ncli_status, dutils=dutils, ) mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) @@ -338,6 +342,7 @@ def test_get_skaled_monitor_reload_ip( rule_controller=rule_controller, node_config=node_config, checks=skaled_checks, + ncli_status=ncli_status, dutils=dutils, ) mon = get_skaled_monitor(skaled_am, state, schain_record, skaled_status, ncli_status) @@ -380,6 +385,7 @@ def test_get_skaled_monitor_new_node( schain=schain, rule_controller=rule_controller, node_config=node_config, + ncli_status=ncli_status, checks=skaled_checks, dutils=dutils, ) From 4b976d8cf97b55d353c7d716b637499c05b5396e Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 20 Jun 2024 10:52:53 +0000 Subject: [PATCH 019/103] Fix action tests --- tests/schains/monitor/action/skaled_action_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 0fb8d731b..a817438e3 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -178,6 +178,7 @@ def test_skaled_container_snapshot_delay_start_action(skaled_am): start_ts=ts, abort_on_exit=True, dutils=skaled_am.dutils, + snapshot_from='127.0.0.1', sync_node=False, historic_state=False ) From a98659f899b5631634f87a68a5c4b2b0b4896bf8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 18 Jul 2024 12:05:06 +0000 Subject: [PATCH 020/103] Fix merge issues --- core/schains/monitor/skaled_monitor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index 024246379..a946ca2be 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -302,7 +302,7 @@ def get_skaled_monitor( if SYNC_NODE: if no_config(check_status): mon_type = NoConfigSkaledMonitor - if is_recreate_mode(status, schain_record): + if is_recreate_mode(check_status, schain_record): mon_type = RecreateSkaledMonitor elif is_config_update_time(check_status, skaled_status): mon_type = UpdateConfigSkaledMonitor @@ -318,7 +318,7 @@ def get_skaled_monitor( mon_type = BackupSkaledMonitor elif is_repair_mode(schain_record, check_status, skaled_status, ncli_status, automatic_repair): mon_type = RepairSkaledMonitor - elif is_recreate_mode(status, schain_record): + elif is_recreate_mode(check_status, schain_record): mon_type = RecreateSkaledMonitor elif is_new_node_mode(schain_record, action_manager.finish_ts): mon_type = NewNodeSkaledMonitor From 630828f6ad2c88b5d2d20a8408a5ec51c521e09b Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 19 Jul 2024 11:57:27 +0000 Subject: [PATCH 021/103] Fix recreate monitor test --- tests/schains/monitor/skaled_monitor_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index a9e0d5eac..5821a4351 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -438,7 +438,8 @@ def test_get_skaled_monitor_recreate( skaled_am, status, schain_record, - skaled_status + skaled_status, + ncli_status ) assert mon == RegularSkaledMonitor status['skaled_container'] = True @@ -446,7 +447,8 @@ def test_get_skaled_monitor_recreate( skaled_am, status, schain_record, - skaled_status + skaled_status, + ncli_status ) assert mon == RecreateSkaledMonitor From 55e1b909faa1adb5875bb82c683e6b7387971e3d Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 19 Jul 2024 16:58:19 +0000 Subject: [PATCH 022/103] Bump version to 2.8.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 24ba9a38d..834f26295 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.7.0 +2.8.0 From 4689c00fa9f34ffd30ad969601846620d59562e0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 25 Jul 2024 18:37:43 +0000 Subject: [PATCH 023/103] Add Pipeline namedtuple --- core/schains/monitor/main.py | 88 ++++++++++++++++++++++++++---- tests/schains/monitor/main_test.py | 29 +++++++++- 2 files changed, 105 insertions(+), 12 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 58010d347..1892769db 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -18,13 +18,15 @@ # along with this program. If not, see . import functools -import time -import random import logging -from typing import Dict +import queue +import random +import sys +import threading +import time +from typing import Callable, Dict, List, NamedTuple, Optional from concurrent.futures import Future, ThreadPoolExecutor from importlib import reload -from typing import List, Optional from skale import Skale, SkaleIma from web3._utils import request as web3_request @@ -56,10 +58,17 @@ SKALED_PIPELINE_SLEEP = 2 CONFIG_PIPELINE_SLEEP = 3 +STUCK_TIMEOUT = 60 * 60 * 3 +SHUTDOWN_INTERVAL = 60 * 10 logger = logging.getLogger(__name__) +class Pipeline(NamedTuple): + name: str + job: Callable + + def run_config_pipeline( skale: Skale, skale_ima: SkaleIma, schain: Dict, node_config: NodeConfig, stream_version: str ) -> None: @@ -105,9 +114,7 @@ def run_config_pipeline( if SYNC_NODE: logger.info( - 'Sync node last_dkg_successful %s, rotation_data %s', - last_dkg_successful, - rotation_data + 'Sync node last_dkg_successful %s, rotation_data %s', last_dkg_successful, rotation_data ) mon = SyncConfigMonitor(config_am, config_checks) else: @@ -151,18 +158,18 @@ def run_skaled_pipeline( econfig=ExternalConfig(name), dutils=dutils, ) - status = skaled_checks.get_all(log=False, expose=True) + check_status = skaled_checks.get_all(log=False, expose=True) automatic_repair = get_automatic_repair_option() - api_status = get_api_checks_status(status=status, allowed=TG_ALLOWED_CHECKS) + api_status = get_api_checks_status(status=check_status, allowed=TG_ALLOWED_CHECKS) notify_checks(name, node_config.all(), api_status) - logger.info('Skaled status: %s', status) + logger.info('Skaled check status: %s', check_status) logger.info('Upstream config %s', skaled_am.upstream_config_path) mon = get_skaled_monitor( action_manager=skaled_am, - status=status, + check_status=check_status, schain_record=schain_record, skaled_status=skaled_status, automatic_repair=automatic_repair, @@ -286,3 +293,62 @@ def run_monitor_for_schain( if once: return False post_monitor_sleep() + + +def run_pipelines( + pipelines: list[Pipeline], + once: bool = False, + stuck_timeout: int = STUCK_TIMEOUT, + shutdown_interval: int = SHUTDOWN_INTERVAL, +) -> None: + init_ts = time.time() + + heartbeat_queues = [queue.Queue() for _ in range(len(pipelines))] + terminating_events = [threading.Event() for _ in range(len(pipelines))] + heartbeat_ts = [init_ts for _ in range(len(pipelines))] + + threads = [ + threading.Thread( + name=pipeline.name, + target=keep_pipeline, args=[heartbeat_queue, terminating_event, pipeline.job], + daemon=True + ) + for heartbeat_queue, terminating_event, pipeline in zip( + heartbeat_queues, terminating_events, pipelines + ) + ] + + for th in threads: + th.start() + + stuck = False + while not stuck: + for pindex, heartbeat_queue in enumerate(heartbeat_queues): + if not heartbeat_queue.empty(): + heartbeat_ts[pindex] = heartbeat_queue.get() + if time.time() - heartbeat_ts[pindex] > stuck_timeout: + logger.info('Pipeline with number %d/%d stuck', pindex, len(pipelines)) + stuck = True + break + if once and all((lambda ts: ts > init_ts, heartbeat_ts)): + logger.info('Successfully completed required one run. Shutting down the process') + break + + logger.info('Terminating all pipelines') + for event in terminating_events: + event.set() + if stuck: + logger.info('Waiting for graceful completion interval') + time.sleep(shutdown_interval) + logger.info('Stuck was detected') + sys.exit(1) + + +def keep_pipeline( + heartbeat_queue: queue.Queue, terminating_event: threading.Event, pipeline: Callable +) -> None: + while not terminating_event.is_set(): + logger.info('Running pipeline') + pipeline() + heartbeat_queue.put(time.time()) + post_monitor_sleep() diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 3c094ab4b..91da5b75c 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -1,11 +1,14 @@ +import functools import mock +import logging +import time from concurrent.futures import ThreadPoolExecutor import pytest from core.schains.firewall.types import IpRange from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.monitor.main import run_monitor_for_schain +from core.schains.monitor.main import Pipeline, run_monitor_for_schain, run_pipelines from core.schains.task import Task from tools.helper import is_node_part_of_chain @@ -91,3 +94,27 @@ def test_run_monitor_for_schain_left( once=True ) keep_tasks_running_mock.assert_not_called() + + +def test_run_pipelines(): + def simple_pipeline(index: int): + logging.info('Running simple pipeline %d', index) + time.sleep(1) + logging.info('Finishing simple pipeline %d', index) + + def stuck_pipeline(index: int): + logging.info('Running stuck pipeline %d', index) + while True: + logging.info('Stuck pipeline %d beat', index) + time.sleep(2) + + run_pipelines([ + Pipeline(name='healthy0', job=functools.partial(simple_pipeline, index=0)), + Pipeline(name='healthy1', job=functools.partial(simple_pipeline, index=1)), + ], once=True, stuck_timeout=5, shutdown_interval=10) + + with pytest.raises(SystemExit): + run_pipelines([ + Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), + Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)) + ], stuck_timeout=5, shutdown_interval=10) From 438d0626a8f761fe0edd9490416433e2c8cdd86f Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 Jul 2024 17:11:28 +0000 Subject: [PATCH 024/103] Increase log file size --- tools/configs/logs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/configs/logs.py b/tools/configs/logs.py index d21c8da41..0d9205d81 100644 --- a/tools/configs/logs.py +++ b/tools/configs/logs.py @@ -42,10 +42,10 @@ REMOVED_CONTAINERS_FOLDER_NAME ) -LOG_FILE_SIZE_MB = 40 +LOG_FILE_SIZE_MB = 100 LOG_FILE_SIZE_BYTES = LOG_FILE_SIZE_MB * 1000000 -LOG_BACKUP_COUNT = 10 +LOG_BACKUP_COUNT = 20 ADMIN_LOG_FORMAT = '[%(asctime)s %(levelname)s][%(process)d][%(processName)s][%(threadName)s] - %(name)s:%(lineno)d - %(message)s' # noqa API_LOG_FORMAT = '[%(asctime)s] %(process)d %(levelname)s %(url)s %(module)s: %(message)s' # noqa From ea46230d39e62461553e2f5b633c20130a294161 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 Jul 2024 17:33:16 +0000 Subject: [PATCH 025/103] Fix skaled monitor --- core/schains/monitor/skaled_monitor.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index aac40e16a..58b43b06d 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -297,7 +297,7 @@ def no_config(status: Dict) -> bool: def get_skaled_monitor( action_manager: SkaledActionManager, - status: Dict, + check_status: Dict, schain_record: SChainRecord, skaled_status: SkaledStatus, automatic_repair: bool = True @@ -309,32 +309,32 @@ def get_skaled_monitor( mon_type: Type[BaseSkaledMonitor] = RegularSkaledMonitor if SYNC_NODE: - if no_config(status): + if no_config(check_status): mon_type = NoConfigSkaledMonitor - if is_recreate_mode(status, schain_record): + if is_recreate_mode(check_status, schain_record): mon_type = RecreateSkaledMonitor - elif is_config_update_time(status, skaled_status): + elif is_config_update_time(check_status, skaled_status): mon_type = UpdateConfigSkaledMonitor - elif is_reload_group_mode(status, action_manager.upstream_finish_ts): + elif is_reload_group_mode(check_status, action_manager.upstream_finish_ts): mon_type = ReloadGroupSkaledMonitor - elif is_reload_ip_mode(status, action_manager.econfig.reload_ts): + elif is_reload_ip_mode(check_status, action_manager.econfig.reload_ts): mon_type = ReloadIpSkaledMonitor return mon_type - if no_config(status): + if no_config(check_status): mon_type = NoConfigSkaledMonitor elif is_backup_mode(schain_record): mon_type = BackupSkaledMonitor - elif is_repair_mode(schain_record, status, skaled_status, automatic_repair): + elif is_repair_mode(schain_record, check_status, skaled_status, automatic_repair): mon_type = RepairSkaledMonitor - elif is_recreate_mode(status, schain_record): + elif is_recreate_mode(check_status, schain_record): mon_type = RecreateSkaledMonitor elif is_new_node_mode(schain_record, action_manager.finish_ts): mon_type = NewNodeSkaledMonitor - elif is_config_update_time(status, skaled_status): + elif is_config_update_time(check_status, skaled_status): mon_type = UpdateConfigSkaledMonitor - elif is_reload_group_mode(status, action_manager.upstream_finish_ts): + elif is_reload_group_mode(check_status, action_manager.upstream_finish_ts): mon_type = ReloadGroupSkaledMonitor - elif is_reload_ip_mode(status, action_manager.econfig.reload_ts): + elif is_reload_ip_mode(check_status, action_manager.econfig.reload_ts): mon_type = ReloadIpSkaledMonitor return mon_type From b5a7bfe970af632bbeda27de4bea59687e0894cd Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 30 Jul 2024 17:37:55 +0000 Subject: [PATCH 026/103] Switch to new pipeline runner in process_manager --- core/schains/monitor/main.py | 123 +++++++++++++------------------- core/schains/process_manager.py | 9 +-- 2 files changed, 55 insertions(+), 77 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 1892769db..af650f0db 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -21,11 +21,9 @@ import logging import queue import random -import sys import threading import time -from typing import Callable, Dict, List, NamedTuple, Optional -from concurrent.futures import Future, ThreadPoolExecutor +from typing import Callable, Dict, NamedTuple, Optional from importlib import reload from skale import Skale, SkaleIma @@ -40,7 +38,6 @@ from core.schains.monitor import get_skaled_monitor, RegularConfigMonitor, SyncConfigMonitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager from core.schains.external_config import ExternalConfig, ExternalState -from core.schains.task import keep_tasks_running, Task from core.schains.config.static_params import get_automatic_repair_option from core.schains.skaled_status import get_skaled_status from core.node import get_current_nodes @@ -58,7 +55,7 @@ SKALED_PIPELINE_SLEEP = 2 CONFIG_PIPELINE_SLEEP = 3 -STUCK_TIMEOUT = 60 * 60 * 3 +STUCK_TIMEOUT = 60 * 60 * 2 SHUTDOWN_INTERVAL = 60 * 10 logger = logging.getLogger(__name__) @@ -189,20 +186,19 @@ def post_monitor_sleep(): time.sleep(schain_monitor_sleep) -def create_and_execute_tasks( - skale, - schain, +def create_and_execute_pipelines( + skale: Skale, + schain: dict, node_config: NodeConfig, skale_ima: SkaleIma, - stream_version, - schain_record, - executor, - futures, - dutils, -): + schain_record: SChainRecord, + dutils: Optional[DockerUtils] = None, +) -> bool: reload(web3_request) name = schain['name'] + stream_version = get_skale_node_version() + is_rotation_active = skale.node_rotation.is_rotation_active(name) leaving_chain = not SYNC_NODE and not is_node_part_of_chain(skale, name, node_config.id) @@ -222,13 +218,13 @@ def create_and_execute_tasks( statsd_client.incr(f'admin.schain.monitor.{no_hyphens(name)}') statsd_client.gauge(f'admin.schain.monitor_last_seen.{no_hyphens(name)}', monitor_last_seen_ts) - tasks = [] + pipelines = [] if not leaving_chain: - logger.info('Adding config task to the pool') - tasks.append( - Task( - f'{name}-config', - functools.partial( + logger.info('Adding config pipelines to the pool') + pipelines.append( + Pipeline( + name='config', + job=functools.partial( run_config_pipeline, skale=skale, skale_ima=skale_ima, @@ -236,7 +232,6 @@ def create_and_execute_tasks( node_config=node_config, stream_version=stream_version, ), - sleep=CONFIG_PIPELINE_SLEEP, ) ) if schain_record.config_version != stream_version or ( @@ -244,55 +239,26 @@ def create_and_execute_tasks( ): ConfigFileManager(name).remove_skaled_config() else: - logger.info('Adding skaled task to the pool') - tasks.append( - Task( - f'{name}-skaled', - functools.partial( + logger.info('Adding skaled pipeline to the pool') + pipelines.append( + Pipeline( + name='skaled', + job=functools.partial( run_skaled_pipeline, skale=skale, schain=schain, node_config=node_config, dutils=dutils, ), - sleep=SKALED_PIPELINE_SLEEP, ) ) - if len(tasks) == 0: - logger.warning('No tasks to run') - keep_tasks_running(executor, tasks, futures) - + if len(pipelines) == 0: + logger.warning('No pipelines to run') + return False -def run_monitor_for_schain( - skale, skale_ima, node_config: NodeConfig, schain, dutils=None, once=False -): - stream_version = get_skale_node_version() - tasks_number = 2 - with ThreadPoolExecutor(max_workers=tasks_number, thread_name_prefix='T') as executor: - futures: List[Optional[Future]] = [None for i in range(tasks_number)] - while True: - schain_record = SChainRecord.get_by_name(schain['name']) - try: - create_and_execute_tasks( - skale, - schain, - node_config, - skale_ima, - stream_version, - schain_record, - executor, - futures, - dutils, - ) - if once: - return True - post_monitor_sleep() - except Exception: - logger.exception('Monitor iteration failed') - if once: - return False - post_monitor_sleep() + run_pipelines(pipelines) + return True def run_pipelines( @@ -310,8 +276,8 @@ def run_pipelines( threads = [ threading.Thread( name=pipeline.name, - target=keep_pipeline, args=[heartbeat_queue, terminating_event, pipeline.job], - daemon=True + target=keep_pipeline, + args=[heartbeat_queue, terminating_event, pipeline.job] ) for heartbeat_queue, terminating_event, pipeline in zip( heartbeat_queues, terminating_events, pipelines @@ -326,29 +292,40 @@ def run_pipelines( for pindex, heartbeat_queue in enumerate(heartbeat_queues): if not heartbeat_queue.empty(): heartbeat_ts[pindex] = heartbeat_queue.get() - if time.time() - heartbeat_ts[pindex] > stuck_timeout: - logger.info('Pipeline with number %d/%d stuck', pindex, len(pipelines)) + ts = time.time() + if ts - heartbeat_ts[pindex] > stuck_timeout: + logger.warning( + '%s pipeline has stucked (last heartbeat %d)', + pipelines[pindex].name, + heartbeat_ts[pindex], + ) stuck = True break if once and all((lambda ts: ts > init_ts, heartbeat_ts)): - logger.info('Successfully completed required one run. Shutting down the process') + logger.info('Successfully completed requested single run') break logger.info('Terminating all pipelines') for event in terminating_events: - event.set() + if not event.is_set(): + event.set() if stuck: - logger.info('Waiting for graceful completion interval') - time.sleep(shutdown_interval) - logger.info('Stuck was detected') - sys.exit(1) + logger.info('Joining threads with timeout') + for thread in threads: + thread.join(timeout=shutdown_interval) + logger.warning('Stuck was detected') + logger.info('Finishing with pipelines') def keep_pipeline( - heartbeat_queue: queue.Queue, terminating_event: threading.Event, pipeline: Callable + heartbeat_queue: queue.Queue, terminate: threading.Event, pipeline: Callable ) -> None: - while not terminating_event.is_set(): + while not terminate.is_set(): logger.info('Running pipeline') - pipeline() + try: + pipeline() + except Exception: + logger.exception('Pipeline run failed') + terminate.set() heartbeat_queue.put(time.time()) post_monitor_sleep() diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index fddaa6a4d..190f47c75 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -24,7 +24,7 @@ from skale import Skale -from core.schains.monitor.main import run_monitor_for_schain +from core.schains.monitor.main import create_and_execute_pipelines from core.schains.notifications import notify_if_not_enough_balance from core.schains.process_manager_helper import ( terminate_stuck_schain_process, is_monitor_process_alive, terminate_process @@ -77,12 +77,13 @@ def run_pm_schain(skale, skale_ima, node_config, schain: Dict) -> None: logger.info(f'{log_prefix} PID {schain_record.monitor_id} is not running, spawning...') process = Process( name=schain['name'], - target=run_monitor_for_schain, + target=create_and_execute_pipelines, args=( skale, - skale_ima, + schain, node_config, - schain + skale_ima, + schain_record ) ) process.start() From a74f381af49c577f92736f971a8b76635c7d7c20 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 2 Aug 2024 13:36:28 +0000 Subject: [PATCH 027/103] Add ProcessReport --- core/schains/checks.py | 2 +- core/schains/cleaner.py | 2 +- core/schains/monitor/main.py | 98 +----------- core/schains/monitor/pipeline.py | 148 ++++++++++++++++++ .../{process_manager_helper.py => process.py} | 74 +++++---- core/schains/process_manager.py | 18 ++- tests/schains/monitor/main_test.py | 69 ++------ tests/schains/monitor/process_test.py | 34 ++++ 8 files changed, 263 insertions(+), 182 deletions(-) create mode 100644 core/schains/monitor/pipeline.py rename core/schains/{process_manager_helper.py => process.py} (64%) create mode 100644 tests/schains/monitor/process_test.py diff --git a/core/schains/checks.py b/core/schains/checks.py index 8f4b0d9ee..0202ed22d 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -41,7 +41,7 @@ from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController from core.schains.ima import get_ima_time_frame, get_migration_ts as get_ima_migration_ts -from core.schains.process_manager_helper import is_monitor_process_alive +from core.schains.process import is_monitor_process_alive from core.schains.rpc import ( check_endpoint_alive, check_endpoint_blocks, diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 985089db6..0be1454b1 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -35,7 +35,7 @@ get_node_ips_from_config, get_own_ip_from_config ) -from core.schains.process_manager_helper import terminate_schain_process +from core.schains.process import terminate_schain_process from core.schains.runner import get_container_name, is_exited from core.schains.external_config import ExternalConfig from core.schains.types import ContainerType diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index af650f0db..52922e3bf 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -19,11 +19,7 @@ import functools import logging -import queue -import random -import threading -import time -from typing import Callable, Dict, NamedTuple, Optional +from typing import Dict, Optional from importlib import reload from skale import Skale, SkaleIma @@ -33,12 +29,14 @@ from core.node_config import NodeConfig from core.schains.checks import ConfigChecks, get_api_checks_status, TG_ALLOWED_CHECKS, SkaledChecks from core.schains.config.file_manager import ConfigFileManager +from core.schains.config.static_params import get_automatic_repair_option from core.schains.firewall import get_default_rule_controller from core.schains.firewall.utils import get_sync_agent_ranges +from core.schains.external_config import ExternalConfig, ExternalState from core.schains.monitor import get_skaled_monitor, RegularConfigMonitor, SyncConfigMonitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager -from core.schains.external_config import ExternalConfig, ExternalState -from core.schains.config.static_params import get_automatic_repair_option +from core.schains.monitor.pipeline import Pipeline, run_pipelines +from core.schains.process import ProcessReport from core.schains.skaled_status import get_skaled_status from core.node import get_current_nodes @@ -53,19 +51,12 @@ MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 20 MAX_SCHAIN_MONITOR_SLEEP_INTERVAL = 40 -SKALED_PIPELINE_SLEEP = 2 -CONFIG_PIPELINE_SLEEP = 3 STUCK_TIMEOUT = 60 * 60 * 2 SHUTDOWN_INTERVAL = 60 * 10 logger = logging.getLogger(__name__) -class Pipeline(NamedTuple): - name: str - job: Callable - - def run_config_pipeline( skale: Skale, skale_ima: SkaleIma, schain: Dict, node_config: NodeConfig, stream_version: str ) -> None: @@ -178,20 +169,13 @@ def run_skaled_pipeline( mon(skaled_am, skaled_checks).run() -def post_monitor_sleep(): - schain_monitor_sleep = random.randint( - MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, MAX_SCHAIN_MONITOR_SLEEP_INTERVAL - ) - logger.info('Monitor iteration completed, sleeping for %d', schain_monitor_sleep) - time.sleep(schain_monitor_sleep) - - def create_and_execute_pipelines( skale: Skale, schain: dict, node_config: NodeConfig, skale_ima: SkaleIma, schain_record: SChainRecord, + process_report: ProcessReport, dutils: Optional[DockerUtils] = None, ) -> bool: reload(web3_request) @@ -259,73 +243,3 @@ def create_and_execute_pipelines( run_pipelines(pipelines) return True - - -def run_pipelines( - pipelines: list[Pipeline], - once: bool = False, - stuck_timeout: int = STUCK_TIMEOUT, - shutdown_interval: int = SHUTDOWN_INTERVAL, -) -> None: - init_ts = time.time() - - heartbeat_queues = [queue.Queue() for _ in range(len(pipelines))] - terminating_events = [threading.Event() for _ in range(len(pipelines))] - heartbeat_ts = [init_ts for _ in range(len(pipelines))] - - threads = [ - threading.Thread( - name=pipeline.name, - target=keep_pipeline, - args=[heartbeat_queue, terminating_event, pipeline.job] - ) - for heartbeat_queue, terminating_event, pipeline in zip( - heartbeat_queues, terminating_events, pipelines - ) - ] - - for th in threads: - th.start() - - stuck = False - while not stuck: - for pindex, heartbeat_queue in enumerate(heartbeat_queues): - if not heartbeat_queue.empty(): - heartbeat_ts[pindex] = heartbeat_queue.get() - ts = time.time() - if ts - heartbeat_ts[pindex] > stuck_timeout: - logger.warning( - '%s pipeline has stucked (last heartbeat %d)', - pipelines[pindex].name, - heartbeat_ts[pindex], - ) - stuck = True - break - if once and all((lambda ts: ts > init_ts, heartbeat_ts)): - logger.info('Successfully completed requested single run') - break - - logger.info('Terminating all pipelines') - for event in terminating_events: - if not event.is_set(): - event.set() - if stuck: - logger.info('Joining threads with timeout') - for thread in threads: - thread.join(timeout=shutdown_interval) - logger.warning('Stuck was detected') - logger.info('Finishing with pipelines') - - -def keep_pipeline( - heartbeat_queue: queue.Queue, terminate: threading.Event, pipeline: Callable -) -> None: - while not terminate.is_set(): - logger.info('Running pipeline') - try: - pipeline() - except Exception: - logger.exception('Pipeline run failed') - terminate.set() - heartbeat_queue.put(time.time()) - post_monitor_sleep() diff --git a/core/schains/monitor/pipeline.py b/core/schains/monitor/pipeline.py new file mode 100644 index 000000000..16f738cea --- /dev/null +++ b/core/schains/monitor/pipeline.py @@ -0,0 +1,148 @@ +import logging +import queue +import random +import sys +import threading +import time + +from typing import Callable, NamedTuple +from core.schains.config.directory import schain_config_dir + +logger = logging.getLogger(__name__) + + +MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 20 +MAX_SCHAIN_MONITOR_SLEEP_INTERVAL = 40 + +SKALED_PIPELINE_SLEEP = 2 +CONFIG_PIPELINE_SLEEP = 3 +STUCK_TIMEOUT = 60 * 60 * 2 +SHUTDOWN_INTERVAL = 60 * 10 + + +class Pipeline(NamedTuple): + name: str + job: Callable + + +# class Runner: +# def __init__( +# self, +# pipelines: list[Pipeline], +# reporting_queue: queue.Queue, +# stuck_timeout: int = STUCK_TIMEOUT, +# shutdown_interval: int = SHUTDOWN_INTERVAL +# ) -> None: +# self.pipelines = pipelines +# self.reporting_queue = reporting_queue +# self.stuck_timeout = stuck_timeout +# self.shutdown_interval = shutdown_interval +# self.number = len(pipelines) +# +# def make_threads(self) -> None: +# init_ts = time.time() +# heartbeat_queues = [queue.Queue() for _ in range(self.number)] +# terminating_events = [threading.Event() for _ in range(self.number)] +# heartbeat_ts = [init_ts for _ in range(self.number)] +# +# return [ +# threading.Thread( +# name=pipeline.name, +# target=keep_pipeline, +# args=[heartbeat_queue, terminating_event, pipeline.job], +# ) +# for heartbeat_queue, terminating_event, pipeline in zip( +# heartbeat_queues, terminating_events, self.pipelines +# ) +# ] +# +# def run_threads(self, threads: list[threading.Thread]) -> None: +# for th in threads: +# th.start() +# +# def run(self, once: bool = False) -> None: +# threads = self.make_threads() +# self.run_threads(threads) + + +def run_pipelines( + pipelines: list[Pipeline], + process_report: ProcessReport, + once: bool = False, + stuck_timeout: int = STUCK_TIMEOUT, + shutdown_interval: int = SHUTDOWN_INTERVAL, +) -> None: + init_ts = time.time() + + heartbeat_queues = [queue.Queue() for _ in range(len(pipelines))] + terminating_events = [threading.Event() for _ in range(len(pipelines))] + heartbeat_ts = [init_ts for _ in range(len(pipelines))] + + threads = [ + threading.Thread( + name=pipeline.name, + target=keep_pipeline, + args=[heartbeat_queue, terminating_event, pipeline.job], + ) + for heartbeat_queue, terminating_event, pipeline in zip( + heartbeat_queues, terminating_events, pipelines + ) + ] + + for th in threads: + th.start() + + stuck = False + while not stuck: + for pindex, heartbeat_queue in enumerate(heartbeat_queues): + if not heartbeat_queue.empty(): + heartbeat_ts[pindex] = heartbeat_queue.get() + ts = int(time.time()) + if ts - heartbeat_ts[pindex] > stuck_timeout: + logger.warning( + '%s pipeline has stucked (last heartbeat %d)', + pipelines[pindex].name, + heartbeat_ts[pindex], + ) + stuck = True + break + if once and all((lambda ts: ts > init_ts, heartbeat_ts)): + logger.info('Successfully completed requested single run') + break + ts = int(time.time()) + process_report.ts = ts + + logger.info('Terminating all pipelines') + for event in terminating_events: + if not event.is_set(): + event.set() + if stuck: + logger.info('Joining threads with timeout') + for thread in threads: + thread.join(timeout=shutdown_interval) + process_report.ts = 0 + logger.warning('Stuck was detected') + + logger.info('Finishing with pipelines') + + +def keep_pipeline( + reporting_queue: queue.Queue, terminate: threading.Event, pipeline: Callable +) -> None: + while not terminate.is_set(): + logger.info('Running pipeline') + try: + pipeline() + except Exception: + logger.exception('Pipeline run failed') + terminate.set() + reporting_queue.put(time.time()) + sleep_for_a_while() + + +def sleep_for_a_while(): + schain_monitor_sleep = random.randint( + MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, MAX_SCHAIN_MONITOR_SLEEP_INTERVAL + ) + logger.info('Monitor iteration completed, sleeping for %d', schain_monitor_sleep) + time.sleep(schain_monitor_sleep) diff --git a/core/schains/process_manager_helper.py b/core/schains/process.py similarity index 64% rename from core/schains/process_manager_helper.py rename to core/schains/process.py index 2128c7ba1..6e6317daf 100644 --- a/core/schains/process_manager_helper.py +++ b/core/schains/process.py @@ -19,12 +19,15 @@ import logging import os +import shutil import signal -from datetime import datetime +import json +import pathlib import psutil +from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import check_pid @@ -34,28 +37,6 @@ P_KILL_WAIT_TIMEOUT = 60 -def terminate_stuck_schain_process(skale, schain_record, schain): - """ - This function terminates the process if last_seen time is less than - DKG timeout * TIMEOUT_COEFFICIENT - """ - allowed_last_seen_time = _calc_allowed_last_seen_time(skale) - if not schain_record.monitor_last_seen: - logging.warning(f'schain: {schain["name"]}, monitor_last_seen is None, skipping...') - return - schain_monitor_last_seen = schain_record.monitor_last_seen.timestamp() - if allowed_last_seen_time > schain_monitor_last_seen: - logger.warning(f'schain: {schain["name"]}, pid {schain_record.monitor_id} last seen is \ -{schain_monitor_last_seen}, while max allowed last_seen is {allowed_last_seen_time}, pid \ -{schain_record.monitor_id} will be terminated now!') - terminate_schain_process(schain_record) - - -def terminate_schain_process(schain_record): - log_msg = f'schain: {schain_record.name}' - terminate_process(schain_record.monitor_id, log_msg=log_msg) - - def terminate_process(pid, kill_timeout=P_KILL_WAIT_TIMEOUT, log_msg=''): log_prefix = f'pid: {pid} - ' if log_msg != '': @@ -79,14 +60,49 @@ def terminate_process(pid, kill_timeout=P_KILL_WAIT_TIMEOUT, log_msg=''): logging.exception(f'{log_prefix} - termination failed!') +def terminate_schain_process(schain_record): + log_msg = f'schain: {schain_record.name}' + terminate_process(schain_record.monitor_id, log_msg=log_msg) + + def is_monitor_process_alive(monitor_id): """Checks that provided monitor_id is inited and alive""" return monitor_id != 0 and check_pid(monitor_id) -def _calc_allowed_last_seen_time(skale): - dkg_timeout = skale.constants_holder.get_dkg_timeout() - allowed_diff = int(dkg_timeout * TIMEOUT_COEFFICIENT) - logger.info(f'dkg_timeout: {dkg_timeout}, TIMEOUT_COEFFICIENT: {TIMEOUT_COEFFICIENT}, \ -allowed_diff: {allowed_diff}') - return datetime.now().timestamp() - allowed_diff +class ProcessReport: + REPORT_FILENAME = 'process.json' + + def __init__(self, name: str) -> None: + self.path = pathlib.Path.joinpath(SCHAINS_DIR_PATH, name, self.REPORT_FILENAME) + + @property + def ts(self) -> int: + return self.read()['ts'] + + @property + def pid(self) -> int: + return self.read()['pid'] + + @property + def _tmp_path(self) -> str: + path = pathlib.Path(self.path) + return path.with_stem('.tmp.' + path.stem) + + def read(self) -> dict: + with open(self.path) as process_file: + data = json.load(process_file) + return data + + def _save_tmp(self, pid: int, ts: int) -> None: + data = {'pid': pid, 'ts': ts} + with open(self._tmp_path, 'w') as tmp_file: + json.dump(data, tmp_file) + + def _move(self) -> str: + if os.path.isfile(self._tmp_path): + shutil.move(self._tmp_path, self.path) + + def update(self, pid: int, ts: int) -> None: + self._save_tmp(pid=pid, ts=ts) + self._move() diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 190f47c75..c8208a36d 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -17,8 +17,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import sys import logging +import sys +import time from typing import Dict from multiprocessing import Process @@ -26,8 +27,10 @@ from core.schains.monitor.main import create_and_execute_pipelines from core.schains.notifications import notify_if_not_enough_balance -from core.schains.process_manager_helper import ( - terminate_stuck_schain_process, is_monitor_process_alive, terminate_process +from core.schains.process import ( + is_monitor_process_alive, + terminate_process, + ProcessReport, ) from web.models.schain import upsert_schain_record, SChainRecord @@ -37,6 +40,9 @@ logger = logging.getLogger(__name__) +DKG_TIMEOUT_COEFFICIENT = 2.2 + + def pm_signal_handler(*args): """ This function is trigerred when SIGTERM signal is received by the main process of the app. @@ -68,9 +74,13 @@ def run_process_manager(skale, skale_ima, node_config): def run_pm_schain(skale, skale_ima, node_config, schain: Dict) -> None: schain_record = upsert_schain_record(schain['name']) + process_report = ProcessReport(schain['name']) log_prefix = f'sChain {schain["name"]} -' # todo - move to logger formatter - terminate_stuck_schain_process(skale, schain_record, schain) + dkg_timeout = skale.constants_holder.get_dkg_timeout() + allowed_diff = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) + if int(time.time()) - process_report.ts > allowed_diff: + terminate_process(process_report.pid) monitor_process_alive = is_monitor_process_alive(schain_record.monitor_id) if not monitor_process_alive: diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 91da5b75c..0dcdcb3ef 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -1,18 +1,15 @@ import functools -import mock import logging import time -from concurrent.futures import ThreadPoolExecutor +from multiprocessing import Process import pytest from core.schains.firewall.types import IpRange from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.monitor.main import Pipeline, run_monitor_for_schain, run_pipelines -from core.schains.task import Task +from core.schains.monitor.main import Pipeline, run_pipelines from tools.helper import is_node_part_of_chain -from web.models.schain import upsert_schain_record @pytest.fixture @@ -52,50 +49,6 @@ def test_is_node_part_of_chain(skale, schain_on_contracts, node_config): assert not chain_on_node -def test_run_monitor_for_schain( - skale, - skale_ima, - schain_on_contracts, - node_config, - schain_db, - dutils -): - with mock.patch('core.schains.monitor.main.keep_tasks_running') as keep_tasks_running_mock: - run_monitor_for_schain( - skale, - skale_ima, - node_config, - schain={'name': schain_db, 'partOfNode': 0, 'generation': 0}, - dutils=dutils, - once=True - ) - assert isinstance(keep_tasks_running_mock.call_args[0][0], ThreadPoolExecutor) - assert isinstance(keep_tasks_running_mock.call_args[0][1][0], Task) - assert isinstance(keep_tasks_running_mock.call_args[0][1][1], Task) - assert keep_tasks_running_mock.call_args[0][2] == [None, None] - - -def test_run_monitor_for_schain_left( - skale, - skale_ima, - node_config, - schain_db, - dutils -): - schain_not_exists = 'not-on-node' - upsert_schain_record(schain_not_exists) - with mock.patch('core.schains.monitor.main.keep_tasks_running') as keep_tasks_running_mock: - run_monitor_for_schain( - skale, - skale_ima, - node_config, - schain={'name': schain_not_exists, 'partOfNode': 0, 'generation': 0}, - dutils=dutils, - once=True - ) - keep_tasks_running_mock.assert_not_called() - - def test_run_pipelines(): def simple_pipeline(index: int): logging.info('Running simple pipeline %d', index) @@ -108,13 +61,19 @@ def stuck_pipeline(index: int): logging.info('Stuck pipeline %d beat', index) time.sleep(2) - run_pipelines([ + target = functools.partial(run_pipelines, pipelines=[ Pipeline(name='healthy0', job=functools.partial(simple_pipeline, index=0)), Pipeline(name='healthy1', job=functools.partial(simple_pipeline, index=1)), ], once=True, stuck_timeout=5, shutdown_interval=10) + monitor_process = Process(target=target) + monitor_process.start() + monitor_process.join() + + run_pipelines([ + Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), + Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)) + ], stuck_timeout=5, shutdown_interval=10) - with pytest.raises(SystemExit): - run_pipelines([ - Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), - Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)) - ], stuck_timeout=5, shutdown_interval=10) + monitor_process = Process(target=target) + monitor_process.start() + monitor_process.join(timeout=50) diff --git a/tests/schains/monitor/process_test.py b/tests/schains/monitor/process_test.py new file mode 100644 index 000000000..a46572744 --- /dev/null +++ b/tests/schains/monitor/process_test.py @@ -0,0 +1,34 @@ +import os +import shutil +import time +from pathlib import Path + +import pytest + +from core.schains.process import ProcessReport + +from tools.configs.schains import SCHAINS_DIR_PATH + + +@pytest.fixture +def tmp_dir(): + schain_name = 'test' + path = os.path.join(SCHAINS_DIR_PATH, schain_name) + Path(path).mkdir() + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + + +def test_process_report(tmp_dir): + path = os.path.join(tmp_dir, 'process.json') + report = ProcessReport(path) + with pytest.raises(FileNotFoundError): + assert report.ts == 0 + + ts = int(time.time()) + pid = 10 + report.update(pid=pid, ts=ts) + assert report.ts == ts + assert report.pid == pid From c40b4db18913409d73563475d4a7825ad8406349 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 6 Aug 2024 18:15:05 +0000 Subject: [PATCH 028/103] Add process_manager tests --- core/schains/monitor/main.py | 17 +++-- core/schains/monitor/pipeline.py | 3 +- core/schains/process.py | 34 ++++++++-- core/schains/process_manager.py | 94 +++++++++++++-------------- tests/schains/monitor/main_test.py | 51 ++++++++++++--- tests/schains/monitor/process_test.py | 10 ++- tests/schains/process_manager_test.py | 75 +++++++++++++++++++++ 7 files changed, 204 insertions(+), 80 deletions(-) create mode 100644 tests/schains/process_manager_test.py diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 52922e3bf..ffefdad46 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -19,7 +19,7 @@ import functools import logging -from typing import Dict, Optional +from typing import Optional from importlib import reload from skale import Skale, SkaleIma @@ -45,7 +45,7 @@ from tools.notifications.messages import notify_checks from tools.helper import is_node_part_of_chain, no_hyphens from tools.resources import get_statsd_client -from web.models.schain import SChainRecord +from web.models.schain import SChainRecord, upsert_schain_record MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 20 @@ -53,12 +53,13 @@ STUCK_TIMEOUT = 60 * 60 * 2 SHUTDOWN_INTERVAL = 60 * 10 +DKG_TIMEOUT_COEFFICIENT = 2.2 logger = logging.getLogger(__name__) def run_config_pipeline( - skale: Skale, skale_ima: SkaleIma, schain: Dict, node_config: NodeConfig, stream_version: str + skale: Skale, skale_ima: SkaleIma, schain: dict, node_config: NodeConfig, stream_version: str ) -> None: name = schain['name'] schain_record = SChainRecord.get_by_name(name) @@ -119,7 +120,7 @@ def run_config_pipeline( def run_skaled_pipeline( - skale: Skale, schain: Dict, node_config: NodeConfig, dutils: DockerUtils + skale: Skale, schain: dict, node_config: NodeConfig, dutils: DockerUtils ) -> None: name = schain['name'] schain_record = SChainRecord.get_by_name(name) @@ -169,12 +170,11 @@ def run_skaled_pipeline( mon(skaled_am, skaled_checks).run() -def create_and_execute_pipelines( +def start_monitor( skale: Skale, schain: dict, node_config: NodeConfig, skale_ima: SkaleIma, - schain_record: SChainRecord, process_report: ProcessReport, dutils: Optional[DockerUtils] = None, ) -> bool: @@ -182,6 +182,7 @@ def create_and_execute_pipelines( name = schain['name'] stream_version = get_skale_node_version() + schain_record = upsert_schain_record(name) is_rotation_active = skale.node_rotation.is_rotation_active(name) @@ -205,6 +206,8 @@ def create_and_execute_pipelines( pipelines = [] if not leaving_chain: logger.info('Adding config pipelines to the pool') + dkg_timeout = skale.constants_holder.get_dkg_timeout() + stuck_timeout = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) pipelines.append( Pipeline( name='config', @@ -241,5 +244,5 @@ def create_and_execute_pipelines( logger.warning('No pipelines to run') return False - run_pipelines(pipelines) + run_pipelines(pipelines=pipelines, process_report=process_report) return True diff --git a/core/schains/monitor/pipeline.py b/core/schains/monitor/pipeline.py index 16f738cea..cf9f73ae7 100644 --- a/core/schains/monitor/pipeline.py +++ b/core/schains/monitor/pipeline.py @@ -1,12 +1,11 @@ import logging import queue import random -import sys import threading import time from typing import Callable, NamedTuple -from core.schains.config.directory import schain_config_dir +from core.schains.process import ProcessReport logger = logging.getLogger(__name__) diff --git a/core/schains/process.py b/core/schains/process.py index 6e6317daf..734bd7d8a 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -74,35 +74,55 @@ class ProcessReport: REPORT_FILENAME = 'process.json' def __init__(self, name: str) -> None: - self.path = pathlib.Path.joinpath(SCHAINS_DIR_PATH, name, self.REPORT_FILENAME) + self.path = pathlib.Path(SCHAINS_DIR_PATH).joinpath(name, self.REPORT_FILENAME) + + def is_exist(self) -> bool: + return os.path.isfile(self.path) @property def ts(self) -> int: return self.read()['ts'] + @ts.setter + def ts(self, value: int) -> None: + report = {} + if self.is_exist(): + report = self.read() + report['ts'] = value + self._save_tmp(report) + self._move() + @property def pid(self) -> int: return self.read()['pid'] + @pid.setter + def pid(self, value: int) -> None: + report = {} + if self.is_exist(): + report = self.read() + report['pid'] = value + self._save_tmp(report) + self._move() + @property def _tmp_path(self) -> str: - path = pathlib.Path(self.path) - return path.with_stem('.tmp.' + path.stem) + return self.path.with_stem('.tmp.' + self.path.stem) def read(self) -> dict: with open(self.path) as process_file: data = json.load(process_file) return data - def _save_tmp(self, pid: int, ts: int) -> None: - data = {'pid': pid, 'ts': ts} + def _save_tmp(self, report: dict) -> None: with open(self._tmp_path, 'w') as tmp_file: - json.dump(data, tmp_file) + json.dump(report, tmp_file) def _move(self) -> str: if os.path.isfile(self._tmp_path): shutil.move(self._tmp_path, self.path) def update(self, pid: int, ts: int) -> None: - self._save_tmp(pid=pid, ts=ts) + report = {'pid': pid, 'ts': ts} + self._save_tmp(report=report) self._move() diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index c8208a36d..549f492cd 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -18,14 +18,14 @@ # along with this program. If not, see . import logging -import sys import time -from typing import Dict from multiprocessing import Process +from typing import Optional -from skale import Skale +from skale import Skale, SkaleIma -from core.schains.monitor.main import create_and_execute_pipelines +from core.node_config import NodeConfig +from core.schains.monitor.main import start_monitor from core.schains.notifications import notify_if_not_enough_balance from core.schains.process import ( is_monitor_process_alive, @@ -33,7 +33,6 @@ ProcessReport, ) -from web.models.schain import upsert_schain_record, SChainRecord from tools.str_formatters import arguments_list_string @@ -43,24 +42,7 @@ DKG_TIMEOUT_COEFFICIENT = 2.2 -def pm_signal_handler(*args): - """ - This function is trigerred when SIGTERM signal is received by the main process of the app. - The purpose of the process manager signal handler is to forward SIGTERM signal to all sChain - processes so they can gracefully save DKG results before - """ - records = SChainRecord.select() - print(f'schain_records: {len(records)}') - print(f'schain_records: {records}') - for r in records: - logger.warning(f'Sending SIGTERM to {r.name}, {r.monitor_id}') - terminate_process(r.monitor_id) - logger.warning('All sChain processes stopped, exiting...') - sys.exit(0) - - -def run_process_manager(skale, skale_ima, node_config): - # signal.signal(signal.SIGTERM, pm_signal_handler) +def run_process_manager(skale: Skale, skale_ima: SkaleIma, node_config: NodeConfig) -> None: logger.info('Process manager started') node_id = node_config.id node_info = node_config.all() @@ -72,35 +54,42 @@ def run_process_manager(skale, skale_ima, node_config): logger.info('Process manager procedure finished') -def run_pm_schain(skale, skale_ima, node_config, schain: Dict) -> None: - schain_record = upsert_schain_record(schain['name']) - process_report = ProcessReport(schain['name']) - log_prefix = f'sChain {schain["name"]} -' # todo - move to logger formatter +def run_pm_schain( + skale: Skale, + skale_ima: SkaleIma, + node_config: NodeConfig, + schain: dict, + timeout: Optional[int] = None, +) -> None: + log_prefix = f'sChain {schain["name"]} -' - dkg_timeout = skale.constants_holder.get_dkg_timeout() - allowed_diff = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) - if int(time.time()) - process_report.ts > allowed_diff: - terminate_process(process_report.pid) - monitor_process_alive = is_monitor_process_alive(schain_record.monitor_id) + if timeout is not None: + allowed_diff = timeout + else: + dkg_timeout = skale.constants_holder.get_dkg_timeout() + allowed_diff = timeout or int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) - if not monitor_process_alive: - logger.info(f'{log_prefix} PID {schain_record.monitor_id} is not running, spawning...') + process_report = ProcessReport(schain['name']) + init_ts = int(time.time()) + if process_report.is_exist(): + if init_ts - process_report.ts > allowed_diff: + logger.info('%s Terminating process: PID = %d', log_prefix, process_report.pid) + terminate_process(process_report.pid) + else: + pid = process_report.pid + logger.info('%s Process is running: PID = %d', log_prefix, pid) + + if not process_report.is_exist() or not is_monitor_process_alive(process_report.pid): + process_report.ts = init_ts process = Process( name=schain['name'], - target=create_and_execute_pipelines, - args=( - skale, - schain, - node_config, - skale_ima, - schain_record - ) + target=start_monitor, + args=(skale, schain, node_config, skale_ima, process_report), ) process.start() - schain_record.set_monitor_id(process.ident) - logger.info(f'{log_prefix} Process started: PID = {process.ident}') - else: - logger.info(f'{log_prefix} Process is running: PID = {schain_record.monitor_id}') + pid = process.ident + process_report.pid = pid + logger.info('%s Process started: PID = %d', log_prefix, pid) def fetch_schains_to_monitor(skale: Skale, node_id: int) -> list: @@ -114,9 +103,16 @@ def fetch_schains_to_monitor(skale: Skale, node_id: int) -> list: active_schains = list(filter(lambda schain: schain['active'], schains)) schains_holes = len(schains) - len(active_schains) logger.info( - arguments_list_string({'Node ID': node_id, 'sChains on node': active_schains, - 'Number of sChains on node': len(active_schains), - 'Empty sChain structs': schains_holes}, 'Monitoring sChains')) + arguments_list_string( + { + 'Node ID': node_id, + 'sChains on node': active_schains, + 'Number of sChains on node': len(active_schains), + 'Empty sChain structs': schains_holes, + }, + 'Monitoring sChains', + ) + ) return active_schains diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 0dcdcb3ef..da003fc33 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -1,5 +1,8 @@ import functools import logging +import os +import pathlib +import shutil import time from multiprocessing import Process @@ -8,10 +11,21 @@ from core.schains.firewall.types import IpRange from core.schains.firewall.utils import get_sync_agent_ranges from core.schains.monitor.main import Pipeline, run_pipelines - +from core.schains.process import ProcessReport, terminate_process +from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import is_node_part_of_chain +@pytest.fixture +def tmp_dir(_schain_name): + path = os.path.join(SCHAINS_DIR_PATH, _schain_name) + pathlib.Path(path).mkdir() + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + + @pytest.fixture def sync_ranges(skale): skale.sync_manager.grant_sync_manager_role(skale.wallet.address) @@ -49,7 +63,7 @@ def test_is_node_part_of_chain(skale, schain_on_contracts, node_config): assert not chain_on_node -def test_run_pipelines(): +def test_run_pipelines(tmp_dir, _schain_name): def simple_pipeline(index: int): logging.info('Running simple pipeline %d', index) time.sleep(1) @@ -61,19 +75,38 @@ def stuck_pipeline(index: int): logging.info('Stuck pipeline %d beat', index) time.sleep(2) + process_report = ProcessReport(name=_schain_name) + target = functools.partial(run_pipelines, pipelines=[ Pipeline(name='healthy0', job=functools.partial(simple_pipeline, index=0)), Pipeline(name='healthy1', job=functools.partial(simple_pipeline, index=1)), - ], once=True, stuck_timeout=5, shutdown_interval=10) + ], process_report=process_report, once=True, stuck_timeout=5, shutdown_interval=10) + + terminated = False monitor_process = Process(target=target) - monitor_process.start() - monitor_process.join() + try: + monitor_process.start() + monitor_process.join() + finally: + if monitor_process.is_alive(): + terminated = True + terminate_process(monitor_process.ident) + assert not terminated - run_pipelines([ + target = functools.partial(run_pipelines, pipelines=[ Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)) - ], stuck_timeout=5, shutdown_interval=10) + ], process_report=process_report, stuck_timeout=5, shutdown_interval=10) monitor_process = Process(target=target) - monitor_process.start() - monitor_process.join(timeout=50) + terminated = False + + try: + monitor_process.start() + monitor_process.join(timeout=50) + finally: + if monitor_process.is_alive(): + terminated = True + terminate_process(monitor_process.ident) + + assert terminated diff --git a/tests/schains/monitor/process_test.py b/tests/schains/monitor/process_test.py index a46572744..d5b495d03 100644 --- a/tests/schains/monitor/process_test.py +++ b/tests/schains/monitor/process_test.py @@ -11,9 +11,8 @@ @pytest.fixture -def tmp_dir(): - schain_name = 'test' - path = os.path.join(SCHAINS_DIR_PATH, schain_name) +def tmp_dir(_schain_name): + path = os.path.join(SCHAINS_DIR_PATH, _schain_name) Path(path).mkdir() try: yield path @@ -21,9 +20,8 @@ def tmp_dir(): shutil.rmtree(path, ignore_errors=True) -def test_process_report(tmp_dir): - path = os.path.join(tmp_dir, 'process.json') - report = ProcessReport(path) +def test_process_report(_schain_name, tmp_dir): + report = ProcessReport(_schain_name) with pytest.raises(FileNotFoundError): assert report.ts == 0 diff --git a/tests/schains/process_manager_test.py b/tests/schains/process_manager_test.py new file mode 100644 index 000000000..80171e749 --- /dev/null +++ b/tests/schains/process_manager_test.py @@ -0,0 +1,75 @@ +import mock +import os +import pathlib +import shutil +import time + +import psutil +import pytest + +from core.schains.process import ProcessReport, terminate_process +from core.schains.process_manager import run_pm_schain +from tools.configs.schains import SCHAINS_DIR_PATH + + +@pytest.fixture +def tmp_dir(_schain_name): + path = os.path.join(SCHAINS_DIR_PATH, _schain_name) + pathlib.Path(path).mkdir() + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + + +def test_run_pm_schain(tmp_dir, skale, skale_ima, node_config, _schain_name): + def target_regular_mock(*args, **kwargs): + process_report = args[-1] + iterations = 5 + for _ in range(iterations): + process_report.ts = int(time.time()) + time.sleep(1) + + def target_stuck_mock(*args, **kwargs): + iterations = 10000 + for _ in range(iterations): + time.sleep(1) + + schain = {'name': _schain_name} + + timeout = 7 + + with mock.patch('core.schains.process_manager.start_monitor', target_regular_mock): + run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) + pid = ProcessReport(_schain_name).pid + assert psutil.Process(pid).is_running() + + start_ts = int(time.time()) + + while int(time.time()) - start_ts < 2 * timeout: + time.sleep(1) + assert psutil.Process(pid).status() == 'zombie' + + with mock.patch('core.schains.process_manager.start_monitor', target_stuck_mock): + run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) + + pid = ProcessReport(_schain_name).pid + + assert psutil.Process(pid).is_running() + + start_ts = int(time.time()) + + while int(time.time()) - start_ts < 2 * timeout: + try: + psutil.Process(pid).is_running() + except psutil.NoSuchProcess: + break + with mock.patch('core.schains.process_manager.start_monitor', target_stuck_mock): + run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) + time.sleep(1) + + with pytest.raises(psutil.NoSuchProcess): + psutil.Process(pid).is_running() + + pid = ProcessReport(_schain_name).pid + terminate_process(pid) From 54d1201e871b19c950158c105a36462baa477889 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 6 Aug 2024 18:15:52 +0000 Subject: [PATCH 029/103] Remove redundant steps in test cleanup --- scripts/helper.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/helper.sh b/scripts/helper.sh index b8f15641a..4e7ee18cc 100644 --- a/scripts/helper.sh +++ b/scripts/helper.sh @@ -31,11 +31,9 @@ export_test_env () { tests_cleanup () { export_test_env - docker rm -f skale_schain_test && docker volume rm test || true - sudo rm -r tests/skale-data/lib || true + rm -r tests/skale-data/lib || true rm tests/skale-data/node_data/node_config.json || true docker rm -f sgx-simulator || true - docker rm -f skale_schain_test1 skale_schain_test2 skale_schain_test3 || true find . -name \*.pyc -delete || true mkdir -p $SGX_CERTIFICATES_FOLDER || true rm -rf $SGX_CERTIFICATES_FOLDER/sgx.* || true From 396a846a83f8c7510de86da832ce478c90ca8005 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 6 Aug 2024 19:08:01 +0000 Subject: [PATCH 030/103] Remove DKG_TIMEOUT_COEFFICIENT duplicate --- core/schains/monitor/main.py | 9 +++++---- core/schains/process_manager.py | 5 +---- tools/configs/schains.py | 2 ++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index ffefdad46..9196921bd 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -42,6 +42,7 @@ from tools.docker_utils import DockerUtils from tools.configs import SYNC_NODE +from tools.configs.schains import DKG_TIMEOUT_COEFFICIENT from tools.notifications.messages import notify_checks from tools.helper import is_node_part_of_chain, no_hyphens from tools.resources import get_statsd_client @@ -53,7 +54,6 @@ STUCK_TIMEOUT = 60 * 60 * 2 SHUTDOWN_INTERVAL = 60 * 10 -DKG_TIMEOUT_COEFFICIENT = 2.2 logger = logging.getLogger(__name__) @@ -184,6 +184,9 @@ def start_monitor( stream_version = get_skale_node_version() schain_record = upsert_schain_record(name) + dkg_timeout = skale.constants_holder.get_dkg_timeout() + stuck_timeout = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) + is_rotation_active = skale.node_rotation.is_rotation_active(name) leaving_chain = not SYNC_NODE and not is_node_part_of_chain(skale, name, node_config.id) @@ -206,8 +209,6 @@ def start_monitor( pipelines = [] if not leaving_chain: logger.info('Adding config pipelines to the pool') - dkg_timeout = skale.constants_holder.get_dkg_timeout() - stuck_timeout = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) pipelines.append( Pipeline( name='config', @@ -244,5 +245,5 @@ def start_monitor( logger.warning('No pipelines to run') return False - run_pipelines(pipelines=pipelines, process_report=process_report) + run_pipelines(pipelines=pipelines, process_report=process_report, stuck_timeout=stuck_timeout) return True diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 549f492cd..a7785af1b 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -34,14 +34,11 @@ ) from tools.str_formatters import arguments_list_string - +from tools.configs.schains import DKG_TIMEOUT_COEFFICIENT logger = logging.getLogger(__name__) -DKG_TIMEOUT_COEFFICIENT = 2.2 - - def run_process_manager(skale: Skale, skale_ima: SkaleIma, node_config: NodeConfig) -> None: logger.info('Process manager started') node_id = node_config.id diff --git a/tools/configs/schains.py b/tools/configs/schains.py index 566709ca8..390275471 100644 --- a/tools/configs/schains.py +++ b/tools/configs/schains.py @@ -53,3 +53,5 @@ RPC_CHECK_TIMEOUT_STEP = 10 MAX_CONSENSUS_STORAGE_INF_VALUE = 1000000000000000000 + +DKG_TIMEOUT_COEFFICIENT = 2.2 From bd8d7cde4238d56aa766db063afb6fdffd8c78f1 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 7 Aug 2024 11:52:55 +0000 Subject: [PATCH 031/103] Update IMA agent versions in container.json --- tests/skale-data/config/containers.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/skale-data/config/containers.json b/tests/skale-data/config/containers.json index 3561d2539..3e0221bd6 100644 --- a/tests/skale-data/config/containers.json +++ b/tests/skale-data/config/containers.json @@ -31,8 +31,8 @@ }, "ima": { "name": "skalenetwork/ima", - "version": "2.0.0-develop.3", - "new_version": "2.0.0-beta.9", + "version": "2.1.0-beta.3", + "new_version": "2.1.0", "custom_args": {}, "args": { "restart_policy": { From 563a67751274e4c1bf305dc3325360bc3f9d73ff Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 7 Aug 2024 17:00:24 +0000 Subject: [PATCH 032/103] Fix IMA migration tests --- tests/schains/monitor/action/skaled_action_test.py | 8 ++++---- tests/skale-data/config/containers.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 3281ec6bd..22f59f876 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -282,7 +282,7 @@ def test_ima_container_action_from_scratch( container_name = containers[0].name assert container_name == f'skale_ima_{skaled_am.name}' image = dutils.get_container_image_name(container_name) - assert image == 'skalenetwork/ima:2.0.0-beta.9' + assert image == 'skalenetwork/ima:2.1.0' # @pytest.mark.skip('Docker API GA issues need to be resolved') @@ -304,8 +304,8 @@ def test_ima_container_action_image_pulling( container_name = containers[0].name assert container_name == f'skale_ima_{skaled_am.name}' image = dutils.get_container_image_name(container_name) - assert image == 'skalenetwork/ima:2.0.0-develop.3' - assert dutils.pulled('skalenetwork/ima:2.0.0-beta.9') + assert image == 'skalenetwork/ima:2.1.0-beta.3' + assert dutils.pulled('skalenetwork/ima:2.1.0') def test_ima_container_action_image_migration( @@ -325,7 +325,7 @@ def test_ima_container_action_image_migration( container_name = containers[0].name assert container_name == f'skale_ima_{skaled_am.name}' image = dutils.get_container_image_name(container_name) - assert image == 'skalenetwork/ima:2.0.0-beta.9' + assert image == 'skalenetwork/ima:2.1.0' def test_ima_container_action_time_frame_migration( diff --git a/tests/skale-data/config/containers.json b/tests/skale-data/config/containers.json index 3e0221bd6..41f3f514e 100644 --- a/tests/skale-data/config/containers.json +++ b/tests/skale-data/config/containers.json @@ -1,7 +1,7 @@ { "schain": { "name": "skalenetwork/schain", - "version": "3.16.1", + "version": "3.19.0", "custom_args": { "ulimits_list": [ { From da438179ece4e9bb945a776ce8c557316e262af0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 7 Aug 2024 17:06:55 +0000 Subject: [PATCH 033/103] Fix routes tests --- tests/routes/schains_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/routes/schains_test.py b/tests/routes/schains_test.py index 329987985..e9dd2218e 100644 --- a/tests/routes/schains_test.py +++ b/tests/routes/schains_test.py @@ -172,8 +172,8 @@ def test_get_schain( def test_schain_containers_versions(skale_bp): - expected_skaled_version = '3.16.1' - expected_ima_version = '2.0.0-beta.9' + expected_skaled_version = '3.19.0' + expected_ima_version = '2.1.0' data = get_bp_data(skale_bp, get_api_url( BLUEPRINT_NAME, 'container-versions')) assert data == { From 6fdce4e616e4742774a87728f4dd5bd73af18e14 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 7 Aug 2024 18:10:59 +0000 Subject: [PATCH 034/103] Remove locking from SChainRecord --- web/models/schain.py | 61 +++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/web/models/schain.py b/web/models/schain.py index a7f67eb79..f60e97e72 100644 --- a/web/models/schain.py +++ b/web/models/schain.py @@ -17,12 +17,13 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import functools import logging -import threading +import time from datetime import datetime from peewee import (CharField, DateTimeField, - IntegrityError, IntegerField, BooleanField) + IntegrityError, IntegerField, BooleanField, OperationalError) from core.schains.dkg.structures import DKGStatus from web.models.base import BaseModel @@ -30,10 +31,31 @@ logger = logging.getLogger(__name__) DEFAULT_CONFIG_VERSION = '0.0.0' +RETRY_ATTEMPTS = 10 +TIMEOUTS = [2 ** p for p in range(RETRY_ATTEMPTS)] + + +def operational_error_retry(func): + @functools.wraps(func) + def wrapper(cls, *args, **kwargs): + result, error = None, None + for i, timeout in enumerate(TIMEOUTS): + try: + result = func(cls, *args, **kwargs) + except OperationalError as e: + logger.exception('DB operational error') + error = e + time.sleep(timeout) + else: + error = None + break + if error is not None: + raise error + return result + return wrapper class SChainRecord(BaseModel): - _lock = threading.Lock() name = CharField(unique=True) added_at = DateTimeField() dkg_status = IntegerField() @@ -70,6 +92,7 @@ def add(cls, name): return (None, err) @classmethod + @operational_error_retry def get_by_name(cls, name): return cls.get(cls.name == name) @@ -107,10 +130,6 @@ def to_dict(cls, record): 'failed_rpc_count': record.failed_rpc_count } - def upload(self, *args, **kwargs) -> None: - with SChainRecord._lock: - self.save(*args, **kwargs) - def dkg_started(self): self.set_dkg_status(DKGStatus.IN_PROGRESS) @@ -126,66 +145,66 @@ def dkg_done(self): def set_dkg_status(self, val: DKGStatus) -> None: logger.info(f'Changing DKG status for {self.name} to {val.name}') self.dkg_status = val - self.upload() + self.save() def set_deleted(self): self.is_deleted = True - self.upload() + self.save() def set_first_run(self, val): logger.info(f'Changing first_run for {self.name} to {val}') self.first_run = val - self.upload(only=[SChainRecord.first_run]) + self.save(only=[SChainRecord.first_run]) def set_backup_run(self, val): logger.info(f'Changing backup_run for {self.name} to {val}') self.backup_run = val - self.upload(only=[SChainRecord.backup_run]) + self.save(only=[SChainRecord.backup_run]) def set_repair_mode(self, value): logger.info(f'Changing repair_mode for {self.name} to {value}') self.repair_mode = value - self.upload() + self.save() def set_new_schain(self, value): logger.info(f'Changing new_schain for {self.name} to {value}') self.new_schain = value - self.upload() + self.save() def set_needs_reload(self, value): logger.info(f'Changing needs_reload for {self.name} to {value}') self.needs_reload = value - self.upload() + self.save() def set_monitor_last_seen(self, value): logger.info(f'Changing monitor_last_seen for {self.name} to {value}') self.monitor_last_seen = value - self.upload() + self.save() def set_monitor_id(self, value): logger.info(f'Changing monitor_id for {self.name} to {value}') self.monitor_id = value - self.upload() + self.save() def set_config_version(self, value): logger.info(f'Changing config_version for {self.name} to {value}') self.config_version = value - self.upload() + self.save() def set_restart_count(self, value: int) -> None: logger.info(f'Changing restart count for {self.name} to {value}') self.restart_count = value - self.upload() + self.save() def set_failed_rpc_count(self, value: int) -> None: logger.info(f'Changing failed rpc count for {self.name} to {value}') self.failed_rpc_count = value - self.upload() + self.save() def set_snapshot_from(self, value: str) -> None: logger.info(f'Changing snapshot from for {self.name} to {value}') self.snapshot_from = value - self.upload() + self.save() def reset_failed_counters(self) -> None: logger.info(f'Resetting failed counters for {self.name}') @@ -203,7 +222,7 @@ def is_dkg_done(self) -> bool: def set_sync_config_run(self, value): logger.info(f'Changing sync_config_run for {self.name} to {value}') self.sync_config_run = value - self.upload() + self.save() def is_dkg_unsuccessful(self) -> bool: return self.dkg_status in [ From 6736628c4a7fc321f9d6efdf526b325ccb38cb35 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 7 Aug 2024 18:11:34 +0000 Subject: [PATCH 035/103] Switch to info log level for gunicorn --- gunicorn.conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn.conf.py b/gunicorn.conf.py index e301f4843..51958e107 100644 --- a/gunicorn.conf.py +++ b/gunicorn.conf.py @@ -1,4 +1,4 @@ bind = "127.0.0.1:3007" workers = 2 timeout = 1000 -loglevel = "debug" +loglevel = "info" From 2899cfb8223eee34e5d6a47e244f40f35b78ee27 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 15 Aug 2024 13:36:20 +0000 Subject: [PATCH 036/103] Add generate_schain_allocation from helper-scripts --- tools/schain_allocation.py | 201 +++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 tools/schain_allocation.py diff --git a/tools/schain_allocation.py b/tools/schain_allocation.py new file mode 100644 index 000000000..3fc131261 --- /dev/null +++ b/tools/schain_allocation.py @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +# +# This file is part of SKALE.py +# +# Copyright (C) 2021-Present SKALE Labs +# +# SKALE.py is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# SKALE.py is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with SKALE.py. If not, see . + +import os +import yaml +import math + + +def calc_disk_factor(divider, decimals=3): + factor = 10 ** decimals + disk_factor_raw = 1 - (1 / (divider + 1)) + return math.floor(disk_factor_raw * factor) / factor + + +LARGE_DIVIDER = 1 +MEDIUM_DIVIDER = 8 +TEST_DIVIDER = 8 +SMALL_DIVIDER = 128 + +VOLUME_CHUNK = 512 * SMALL_DIVIDER +DISK_FACTOR = calc_disk_factor(MEDIUM_DIVIDER) + + +class Alloc: + def to_dict(self): + return self.values + + +class ResourceAlloc(Alloc): + def __init__(self, value, fractional=False): + self.values = { + 'test4': value / TEST_DIVIDER, + 'test': value / TEST_DIVIDER, + 'small': value / SMALL_DIVIDER, + 'medium': value / MEDIUM_DIVIDER, + 'large': value / LARGE_DIVIDER + } + if not fractional: + for k in self.values: + self.values[k] = int(self.values[k]) + + +class DiskResourceAlloc(Alloc): + def __init__(self, value, fractional=False): + self.values = { + 'test4': value / TEST_DIVIDER, + 'test': value / TEST_DIVIDER, + 'small': value / SMALL_DIVIDER, + 'medium': value / MEDIUM_DIVIDER, + 'large': value / LARGE_DIVIDER + } + if not fractional: + for k in self.values: + self.values[k] = int(self.values[k]) + + +class SChainVolumeAlloc(Alloc): + def __init__(self, disk_alloc_dict: dict, proportions: dict): + self.values = {} + for size_name in disk_alloc_dict: + self.values[size_name] = {} + for key, value in proportions.items(): + lim = int(value * disk_alloc_dict[size_name]) + self.values[size_name][key] = lim + + +class LevelDBAlloc(Alloc): + def __init__(self, disk_alloc_dict: dict, proportions: dict): + self.values = {} + for size_name in disk_alloc_dict: + self.values[size_name] = {} + for key, value in proportions.items(): + lim = int(value * disk_alloc_dict[size_name]['max_skaled_leveldb_storage_bytes']) # noqa + self.values[size_name][key] = lim + + +def calculate_free_disk_space(disk_size: int) -> int: + return int(disk_size * DISK_FACTOR) // VOLUME_CHUNK * VOLUME_CHUNK + + +def calculate_shared_space_size(disk_size: int, shared_space_coefficient: float) -> int: + return int(disk_size * (1 - DISK_FACTOR) * shared_space_coefficient) // VOLUME_CHUNK * VOLUME_CHUNK # noqa + + +def safe_load_yaml(filepath): + with open(filepath, 'r') as stream: + try: + return yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + + +def save_yaml(filepath, data, comments=None): + with open(filepath, 'w') as outfile: + if comments: + outfile.write(comments) + yaml.dump(data, outfile, default_flow_style=False) + + +def generate_disk_alloc(configs: dict, + env_type_name: str, + schain_allocation: dict) -> DiskResourceAlloc: + """Generates disk allocation for the provided env type""" + disk_size_bytes = configs['envs'][env_type_name]['server']['disk'] # noqa + free_disk_space = calculate_free_disk_space(disk_size_bytes) + disk_alloc = DiskResourceAlloc(free_disk_space) + schain_allocation[env_type_name]['disk'] = disk_alloc.to_dict() + return disk_alloc + + +def generate_volume_alloc(configs: dict, env_type_name: str, + schain_allocation: dict, + disk_alloc: ResourceAlloc) -> SChainVolumeAlloc: + """Generates volume partitioning """ + """for the provided env type and disk allocation""" + proportions = configs['common']['schain']['volume_limits'] + volume_alloc = SChainVolumeAlloc(disk_alloc.to_dict(), proportions) + schain_allocation[env_type_name]['volume_limits'] = volume_alloc.to_dict() + return volume_alloc + + +def generate_leveldb_alloc(configs: dict, + env_type_name: str, schain_allocation: dict, + volume_alloc: SChainVolumeAlloc) -> LevelDBAlloc: + """Generates LevelDB partitioning """ + """for the provided env type and volume partitioning""" + leveldb_proportions = configs['common']['schain']['leveldb_limits'] + leveldb_alloc = LevelDBAlloc(volume_alloc.to_dict(), leveldb_proportions) + schain_allocation[env_type_name]['leveldb_limits'] = leveldb_alloc.to_dict() + return leveldb_alloc + + +def generate_shared_space_value( + configs: dict, + env_type_name: str, + schain_allocation: dict +) -> int: + disk_size_bytes = configs['envs'][env_type_name]['server']['disk'] # noqa + + shared_space_coefficient = configs['common']['schain']['shared_space_coefficient'] # noqa + shared_space_size_bytes = calculate_shared_space_size(disk_size_bytes, shared_space_coefficient) + + schain_allocation[env_type_name]['shared_space'] = shared_space_size_bytes # noqa + return shared_space_size_bytes + + +def generate_schain_allocation(skale_node_path: str) -> dict: + configs_filepath = os.path.join(skale_node_path, 'static_params.yaml') + schain_allocation_filepath = os.path.join(skale_node_path, 'schain_allocation.yml') + configs = safe_load_yaml(configs_filepath) + + schain_allocation = {} + for env_type_name in configs['envs']: + schain_allocation[env_type_name] = {} + disk_alloc = generate_disk_alloc( + configs, env_type_name, schain_allocation) + volume_alloc = generate_volume_alloc( + configs, env_type_name, schain_allocation, disk_alloc) + generate_leveldb_alloc( + configs, env_type_name, schain_allocation, volume_alloc) + generate_shared_space_value( + configs, env_type_name, schain_allocation) + + return schain_allocation + + +def save_allocation(allocation: dict, allocation_filepath: str) -> None: + save_yaml( + filepath=allocation_filepath, + data=allocation, + comments='# DO NOT MODIFY THIS FILE MANUALLY!\n# Use generate_schain_allocation.py script from helper-scripts repo.\n\n' # noqa + ) + + +def main(): + skale_node_path = os.environ['SKALE_NODE_PATH'] + allocation = generate_schain_allocation(skale_node_path) + print('Generated allocation') + allocation_filepath = os.path.join(skale_node_path, 'schain_allocation_new.yml') + save_allocation(allocation, allocation_filepath) + print(f'Results saved to {allocation_filepath}') + + +if __name__ == "__main__": + main() From 3dc3869588f259d3cccea6d9c1fa24327df9fdb7 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 15 Aug 2024 16:03:50 +0000 Subject: [PATCH 037/103] Add schain allocation tests --- tests/schain_allocation_test.py | 109 ++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 tests/schain_allocation_test.py diff --git a/tests/schain_allocation_test.py b/tests/schain_allocation_test.py new file mode 100644 index 000000000..fae7f3e95 --- /dev/null +++ b/tests/schain_allocation_test.py @@ -0,0 +1,109 @@ +from tools.configs import CONFIG_FOLDER +from tools.schain_allocation import generate_schain_allocation + + +def test_schain_allocation(): + allocation = generate_schain_allocation(CONFIG_FOLDER) + + # devnet + volume_limits = allocation['devnet']['volume_limits'] + assert volume_limits['large'] == { + 'max_consensus_storage_bytes': 21311992627, + 'max_file_storage_bytes': 21311992627, + 'max_reserved_storage_bytes': 7103997542, + 'max_skaled_leveldb_storage_bytes': 21311992627, + } + + assert volume_limits['medium'] == { + 'max_consensus_storage_bytes': 2663999078, + 'max_file_storage_bytes': 2663999078, + 'max_reserved_storage_bytes': 887999692, + 'max_skaled_leveldb_storage_bytes': 2663999078, + } + assert volume_limits['small'] == { + 'max_consensus_storage_bytes': 166499942, + 'max_file_storage_bytes': 166499942, + 'max_reserved_storage_bytes': 55499980, + 'max_skaled_leveldb_storage_bytes': 166499942, + } + assert volume_limits['test'] == { + 'max_consensus_storage_bytes': 2663999078, + 'max_file_storage_bytes': 2663999078, + 'max_reserved_storage_bytes': 887999692, + 'max_skaled_leveldb_storage_bytes': 2663999078, + } + assert volume_limits['test4'] == { + 'max_consensus_storage_bytes': 2663999078, + 'max_file_storage_bytes': 2663999078, + 'max_reserved_storage_bytes': 887999692, + 'max_skaled_leveldb_storage_bytes': 2663999078, + } + + # mainnet + volume_limits = allocation['mainnet']['volume_limits'] + assert volume_limits['large'] == { + 'max_consensus_storage_bytes': 506159982182, + 'max_file_storage_bytes': 506159982182, + 'max_reserved_storage_bytes': 168719994060, + 'max_skaled_leveldb_storage_bytes': 506159982182, + } + + assert volume_limits['medium'] == { + 'max_consensus_storage_bytes': 63269997772, + 'max_file_storage_bytes': 63269997772, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 63269997772, + } + assert volume_limits['small'] == { + 'max_consensus_storage_bytes': 3954374860, + 'max_file_storage_bytes': 3954374860, + 'max_reserved_storage_bytes': 1318124953, + 'max_skaled_leveldb_storage_bytes': 3954374860, + } + assert volume_limits['test'] == { + 'max_consensus_storage_bytes': 63269997772, + 'max_file_storage_bytes': 63269997772, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 63269997772, + } + assert volume_limits['test4'] == { + 'max_consensus_storage_bytes': 63269997772, + 'max_file_storage_bytes': 63269997772, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 63269997772, + } + + # testnet + volume_limits = allocation['testnet']['volume_limits'] + assert volume_limits['large'] == { + 'max_consensus_storage_bytes': 53279981568, + 'max_file_storage_bytes': 53279981568, + 'max_reserved_storage_bytes': 17759993856, + 'max_skaled_leveldb_storage_bytes': 53279981568, + } + assert volume_limits['medium'] == { + 'max_consensus_storage_bytes': 6659997696, + 'max_file_storage_bytes': 6659997696, + 'max_reserved_storage_bytes': 2219999232, + 'max_skaled_leveldb_storage_bytes': 6659997696, + } + assert volume_limits['small'] == { + 'max_consensus_storage_bytes': 416249856, + 'max_file_storage_bytes': 416249856, + 'max_reserved_storage_bytes': 138749952, + 'max_skaled_leveldb_storage_bytes': 416249856, + } + assert volume_limits['test'] == { + 'max_consensus_storage_bytes': 6659997696, + 'max_file_storage_bytes': 6659997696, + 'max_reserved_storage_bytes': 2219999232, + 'max_skaled_leveldb_storage_bytes': 6659997696, + } + assert volume_limits['test4'] == { + 'max_consensus_storage_bytes': 6659997696, + 'max_file_storage_bytes': 6659997696, + 'max_reserved_storage_bytes': 2219999232, + 'max_skaled_leveldb_storage_bytes': 6659997696, + } + + assert allocation['qanet']['volume_limits'] == allocation['testnet']['volume_limits'] From be916dc2f064f6f1fa7635cd3671fe3abe8de358 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 15 Aug 2024 18:03:15 +0000 Subject: [PATCH 038/103] Add allocation types to static_params.yaml --- tests/skale-data/config/static_params.yaml | 246 +++++++++++++++------ 1 file changed, 180 insertions(+), 66 deletions(-) diff --git a/tests/skale-data/config/static_params.yaml b/tests/skale-data/config/static_params.yaml index 03fafefff..4e0846096 100644 --- a/tests/skale-data/config/static_params.yaml +++ b/tests/skale-data/config/static_params.yaml @@ -7,10 +7,31 @@ common: skaled: 0.8 ima: 0.2 volume_limits: - max_consensus_storage_bytes: 0.3 - max_skaled_leveldb_storage_bytes: 0.3 - max_file_storage_bytes: 0.3 - max_reserved_storage_bytes: 0.1 + default: + max_consensus_storage_bytes: 0.3 + max_skaled_leveldb_storage_bytes: 0.3 + max_file_storage_bytes: 0.3 + max_reserved_storage_bytes: 0.1 + no_filestorage: + max_consensus_storage_bytes: 0.45 + max_skaled_leveldb_storage_bytes: 0.45 + max_file_storage_bytes: 0.0 + max_reserved_storage_bytes: 0.1 + max_contract_storage: + max_consensus_storage_bytes: 0.135 + max_skaled_leveldb_storage_bytes: 0.765 + max_file_storage_bytes: 0.0 + max_reserved_storage_bytes: 0.1 + max_filestorage: + max_consensus_storage_bytes: 0.135 + max_skaled_leveldb_storage_bytes: 0.135 + max_file_storage_bytes: 0.63 + max_reserved_storage_bytes: 0.1 + max_consensus_db: + max_consensus_storage_bytes: 0.72 + max_skaled_leveldb_storage_bytes: 0.18 + max_file_storage_bytes: 0.0 + max_reserved_storage_bytes: 0.1 leveldb_limits: contract_storage: 0.6 db_storage: 0.2 # leveldb may use x2 storage, so 0.4 divided by 2, actually using 0.4 @@ -37,18 +58,37 @@ envs: docker-compose: 1.27.4 schain: - contractStorageZeroValuePatchTimestamp: - default: 800000 - test-schain: 1500000 - revertableFSPatchTimestamp: 1000000 - contractStoragePatchTimestamp: 1000000 - snapshotIntervalSec: 0 + contractStorageZeroValuePatchTimestamp: 1681128000 + revertableFSPatchTimestamp: 1681473600 + contractStoragePatchTimestamp: 1681732800 + verifyDaSigsPatchTimestamp: 1681300800 + storageDestructionPatchTimestamp: 1703851200 + powCheckPatchTimestamp: 1703592000 + skipInvalidTransactionsPatchTimestamp: 1703764800 + pushZeroPatchTimestamp: 1712142000 + precompiledConfigPatchTimestamp: 1712314800 + correctForkInPowPatchTimestamp: 1711969200 + EIP1559TransactionsPatchTimestamp: 1722942000 + fastConsensusPatchTimestamp: 1723114800 + flexibleDeploymentPatchTimestamp: + default: 0 + honorable-steel-rasalhague: 1723460400 + elated-tan-skat: 1723460400 + green-giddy-denebola: 1723460400 + parallel-stormy-spica: 1723460400 + verifyBlsSyncPatchTimestamp: 1722855600 + snapshotIntervalSec: 86400 emptyBlockIntervalMs: 10000 snapshotDownloadTimeout: 18000 snapshotDownloadInactiveTimeout: 120 + ima: + time_frame: + before: 1800 + after: 900 + schain_cmd: - ["-v 3", "--web3-trace", "--enable-debug-behavior-apis", "--aa no"] + ["-v 2", "--aa no"] node: common: @@ -63,35 +103,45 @@ envs: collectionQueueSize: 2 collectionDuration: 10 transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 maxOpenLeveldbFiles: 25 medium: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 large: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test4: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 testnet: server: @@ -114,18 +164,32 @@ envs: docker-compose: 1.27.4 schain: - contractStorageZeroValuePatchTimestamp: - default: 800000 - test-schain: 1500000 - revertableFSPatchTimestamp: 1000000 - contractStoragePatchTimestamp: 1000000 - snapshotIntervalSec: 0 + contractStorageZeroValuePatchTimestamp: 1678100400 + revertableFSPatchTimestamp: 1678100400 + contractStoragePatchTimestamp: 1678100400 + verifyDaSigsPatchTimestamp: 1678100400 + storageDestructionPatchTimestamp: 1702393200 + powCheckPatchTimestamp: 1702296000 + skipInvalidTransactionsPatchTimestamp: 1702382400 + pushZeroPatchTimestamp: 1710331200 + precompiledConfigPatchTimestamp: 1710331200 + correctForkInPowPatchTimestamp: 1710331200 + EIP1559TransactionsPatchTimestamp: 1721818800 + fastConsensusPatchTimestamp: 1721822400 + flexibleDeploymentPatchTimestamp: 1721826000 + verifyBlsSyncPatchTimestamp: 1721829600 + snapshotIntervalSec: 86400 emptyBlockIntervalMs: 10000 snapshotDownloadTimeout: 18000 snapshotDownloadInactiveTimeout: 120 + ima: + time_frame: + before: 1800 + after: 900 + schain_cmd: - ["-v 3", "--web3-trace", "--enable-debug-behavior-apis", "--aa no"] + ["-v 2", "--aa no"] node: common: @@ -140,35 +204,45 @@ envs: collectionQueueSize: 2 collectionDuration: 10 transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 maxOpenLeveldbFiles: 25 medium: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 large: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test4: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 qanet: server: @@ -191,64 +265,87 @@ envs: docker-compose: 1.27.4 schain: - contractStorageZeroValuePatchTimestamp: - default: 800000 - test-schain: 1500000 - revertableFSPatchTimestamp: 1000000 - contractStoragePatchTimestamp: 1000000 - snapshotIntervalSec: 0 + contractStorageZeroValuePatchTimestamp: 1691146800 + revertableFSPatchTimestamp: 1691146800 + contractStoragePatchTimestamp: 1691146800 + verifyDaSigsPatchTimestamp: 1691146800 + storageDestructionPatchTimestamp: 1699618500 + powCheckPatchTimestamp: 1699625700 + skipInvalidTransactionsPatchTimestamp: 1699632900 + pushZeroPatchTimestamp: 1712142000 + precompiledConfigPatchTimestamp: 1712314800 + correctForkInPowPatchTimestamp: 1711969200 + EIP1559TransactionsPatchTimestamp: 0 + fastConsensusPatchTimestamp: 0 + flexibleDeploymentPatchTimestamp: 0 + verifyBlsSyncPatchTimestamp: 0 + snapshotIntervalSec: 3600 emptyBlockIntervalMs: 10000 snapshotDownloadTimeout: 18000 snapshotDownloadInactiveTimeout: 120 + ima: + time_frame: + before: 1800 + after: 900 + schain_cmd: - ["-v 3", "--web3-trace", "--enable-debug-behavior-apis", "--aa no"] + ["-v 2", "--aa no"] node: - admin: - automatic_repair: false common: bindIP: "0.0.0.0" logLevel: "info" logLevelConfig: "info" pg-threads: 10 pg-threads-limit: 10 + admin: + automatic_repair: false small: minCacheSize: 1000000 maxCacheSize: 2000000 collectionQueueSize: 2 collectionDuration: 10 transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 maxOpenLeveldbFiles: 25 medium: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 large: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test4: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 - + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 devnet: server: @@ -271,12 +368,21 @@ envs: docker-compose: 1.27.4 schain: - contractStorageZeroValuePatchTimestamp: - default: 800000 - test-schain: 1500000 + contractStorageZeroValuePatchTimestamp: 1000000 revertableFSPatchTimestamp: 1000000 contractStoragePatchTimestamp: 1000000 - snapshotIntervalSec: 0 + verifyDaSigsPatchTimestamp: 1000000 + storageDestructionPatchTimestamp: 1000000 + powCheckPatchTimestamp: 1000000 + skipInvalidTransactionsPatchTimestamp: 1000000 + pushZeroPatchTimestamp: 1712142000 + precompiledConfigPatchTimestamp: 1712314800 + correctForkInPowPatchTimestamp: 1711969200 + EIP1559TransactionsPatchTimestamp: 0 + fastConsensusPatchTimestamp: 0 + flexibleDeploymentPatchTimestamp: 0 + verifyBlsSyncPatchTimestamp: 0 + snapshotIntervalSec: 3600 emptyBlockIntervalMs: 10000 snapshotDownloadTimeout: 18000 snapshotDownloadInactiveTimeout: 120 @@ -290,8 +396,6 @@ envs: ["-v 3", "--web3-trace", "--enable-debug-behavior-apis", "--aa no"] node: - admin: - automatic_repair: true common: bindIP: "0.0.0.0" logLevel: "info" @@ -304,32 +408,42 @@ envs: collectionQueueSize: 2 collectionDuration: 10 transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 maxOpenLeveldbFiles: 25 medium: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 large: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 test4: minCacheSize: 8000000 maxCacheSize: 16000000 collectionQueueSize: 20 collectionDuration: 60 - transactionQueueSize: 100000 - maxOpenLeveldbFiles: 256 + transactionQueueSize: 1000 + transactionQueueLimitBytes: 69206016 + futureTransactionQueueLimitBytes: 140509184 + maxOpenLeveldbFiles: 1000 From 787131f53a98ca6c1baee0d6f2a4693eb5d61e09 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 15 Aug 2024 18:05:47 +0000 Subject: [PATCH 039/103] Modify schain allocation generator --- tests/schain_allocation_test.py | 209 +++++++++++++++++--------------- tools/schain_allocation.py | 18 +-- 2 files changed, 122 insertions(+), 105 deletions(-) diff --git a/tests/schain_allocation_test.py b/tests/schain_allocation_test.py index fae7f3e95..7dba03849 100644 --- a/tests/schain_allocation_test.py +++ b/tests/schain_allocation_test.py @@ -1,109 +1,122 @@ +import pytest from tools.configs import CONFIG_FOLDER from tools.schain_allocation import generate_schain_allocation +EXPECTED_SCHAIN_ALLOCATION = [ + ( + 'mainnet', + 'medium', + 'default', + { + 'max_consensus_storage_bytes': 63269997772, + 'max_file_storage_bytes': 63269997772, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 63269997772, + }, + ), + ( + 'mainnet', + 'medium', + 'no_filestorage', + { + 'max_consensus_storage_bytes': 94904996659, + 'max_file_storage_bytes': 0, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 94904996659, + }, + ), + ( + 'mainnet', + 'medium', + 'max_contract_storage', + { + 'max_consensus_storage_bytes': 28471498997, + 'max_file_storage_bytes': 0, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 161338494320, + }, + ), + ( + 'mainnet', + 'medium', + 'max_consensus_db', + { + 'max_consensus_storage_bytes': 151847994654, + 'max_file_storage_bytes': 0, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 37961998663, + }, + ), + ( + 'mainnet', + 'medium', + 'max_filestorage', + { + 'max_consensus_storage_bytes': 28471498997, + 'max_file_storage_bytes': 132866995322, + 'max_reserved_storage_bytes': 21089999257, + 'max_skaled_leveldb_storage_bytes': 28471498997, + }, + ), +] -def test_schain_allocation(): - allocation = generate_schain_allocation(CONFIG_FOLDER) - # devnet - volume_limits = allocation['devnet']['volume_limits'] - assert volume_limits['large'] == { - 'max_consensus_storage_bytes': 21311992627, - 'max_file_storage_bytes': 21311992627, - 'max_reserved_storage_bytes': 7103997542, - 'max_skaled_leveldb_storage_bytes': 21311992627, - } +EXPECTED_LEVELDB_ALLOCATION = [ + ( + 'mainnet', + 'medium', + 'default', + {'contract_storage': 37961998663, 'db_storage': 12653999554}, + ), + ( + 'mainnet', + 'medium', + 'no_filestorage', + {'contract_storage': 56942997995, 'db_storage': 18980999331}, + ), + ( + 'mainnet', + 'medium', + 'max_contract_storage', + {'contract_storage': 96803096592, 'db_storage': 32267698864}, + ), + ( + 'mainnet', + 'medium', + 'max_consensus_db', + {'contract_storage': 22777199197, 'db_storage': 7592399732}, + ), + ( + 'mainnet', + 'medium', + 'max_filestorage', + {'contract_storage': 17082899398, 'db_storage': 5694299799}, + ), +] - assert volume_limits['medium'] == { - 'max_consensus_storage_bytes': 2663999078, - 'max_file_storage_bytes': 2663999078, - 'max_reserved_storage_bytes': 887999692, - 'max_skaled_leveldb_storage_bytes': 2663999078, - } - assert volume_limits['small'] == { - 'max_consensus_storage_bytes': 166499942, - 'max_file_storage_bytes': 166499942, - 'max_reserved_storage_bytes': 55499980, - 'max_skaled_leveldb_storage_bytes': 166499942, - } - assert volume_limits['test'] == { - 'max_consensus_storage_bytes': 2663999078, - 'max_file_storage_bytes': 2663999078, - 'max_reserved_storage_bytes': 887999692, - 'max_skaled_leveldb_storage_bytes': 2663999078, - } - assert volume_limits['test4'] == { - 'max_consensus_storage_bytes': 2663999078, - 'max_file_storage_bytes': 2663999078, - 'max_reserved_storage_bytes': 887999692, - 'max_skaled_leveldb_storage_bytes': 2663999078, - } - # mainnet - volume_limits = allocation['mainnet']['volume_limits'] - assert volume_limits['large'] == { - 'max_consensus_storage_bytes': 506159982182, - 'max_file_storage_bytes': 506159982182, - 'max_reserved_storage_bytes': 168719994060, - 'max_skaled_leveldb_storage_bytes': 506159982182, - } +@pytest.fixture(scope='module') +def schain_allocation(): + return generate_schain_allocation(CONFIG_FOLDER) - assert volume_limits['medium'] == { - 'max_consensus_storage_bytes': 63269997772, - 'max_file_storage_bytes': 63269997772, - 'max_reserved_storage_bytes': 21089999257, - 'max_skaled_leveldb_storage_bytes': 63269997772, - } - assert volume_limits['small'] == { - 'max_consensus_storage_bytes': 3954374860, - 'max_file_storage_bytes': 3954374860, - 'max_reserved_storage_bytes': 1318124953, - 'max_skaled_leveldb_storage_bytes': 3954374860, - } - assert volume_limits['test'] == { - 'max_consensus_storage_bytes': 63269997772, - 'max_file_storage_bytes': 63269997772, - 'max_reserved_storage_bytes': 21089999257, - 'max_skaled_leveldb_storage_bytes': 63269997772, - } - assert volume_limits['test4'] == { - 'max_consensus_storage_bytes': 63269997772, - 'max_file_storage_bytes': 63269997772, - 'max_reserved_storage_bytes': 21089999257, - 'max_skaled_leveldb_storage_bytes': 63269997772, - } - # testnet - volume_limits = allocation['testnet']['volume_limits'] - assert volume_limits['large'] == { - 'max_consensus_storage_bytes': 53279981568, - 'max_file_storage_bytes': 53279981568, - 'max_reserved_storage_bytes': 17759993856, - 'max_skaled_leveldb_storage_bytes': 53279981568, - } - assert volume_limits['medium'] == { - 'max_consensus_storage_bytes': 6659997696, - 'max_file_storage_bytes': 6659997696, - 'max_reserved_storage_bytes': 2219999232, - 'max_skaled_leveldb_storage_bytes': 6659997696, - } - assert volume_limits['small'] == { - 'max_consensus_storage_bytes': 416249856, - 'max_file_storage_bytes': 416249856, - 'max_reserved_storage_bytes': 138749952, - 'max_skaled_leveldb_storage_bytes': 416249856, - } - assert volume_limits['test'] == { - 'max_consensus_storage_bytes': 6659997696, - 'max_file_storage_bytes': 6659997696, - 'max_reserved_storage_bytes': 2219999232, - 'max_skaled_leveldb_storage_bytes': 6659997696, - } - assert volume_limits['test4'] == { - 'max_consensus_storage_bytes': 6659997696, - 'max_file_storage_bytes': 6659997696, - 'max_reserved_storage_bytes': 2219999232, - 'max_skaled_leveldb_storage_bytes': 6659997696, - } +@pytest.mark.parametrize( + 'network_type,size_name,allocation_type,expected', EXPECTED_SCHAIN_ALLOCATION +) +def test_schain_allocation(network_type, size_name, allocation_type, expected, schain_allocation): + volume_limits = schain_allocation[network_type]['volume_limits'] + assert volume_limits[size_name][allocation_type] == expected + +@pytest.mark.parametrize( + 'network_type,size_name,allocation_type,expected', EXPECTED_LEVELDB_ALLOCATION +) +def test_leveldb_allocation(network_type, size_name, allocation_type, expected, schain_allocation): + leveldb_limits = schain_allocation[network_type]['leveldb_limits'] + assert leveldb_limits[size_name][allocation_type] == expected + + +def test_schain_allocation_testnet(schain_allocation): + allocation = schain_allocation assert allocation['qanet']['volume_limits'] == allocation['testnet']['volume_limits'] + assert allocation['qanet']['leveldb_limits'] == allocation['testnet']['leveldb_limits'] diff --git a/tools/schain_allocation.py b/tools/schain_allocation.py index 3fc131261..932e6472e 100644 --- a/tools/schain_allocation.py +++ b/tools/schain_allocation.py @@ -75,9 +75,11 @@ def __init__(self, disk_alloc_dict: dict, proportions: dict): self.values = {} for size_name in disk_alloc_dict: self.values[size_name] = {} - for key, value in proportions.items(): - lim = int(value * disk_alloc_dict[size_name]) - self.values[size_name][key] = lim + for allocation_type, distribution in proportions.items(): + self.values[size_name][allocation_type] = {} + for key, value in distribution.items(): + lim = int(value * disk_alloc_dict[size_name]) + self.values[size_name][allocation_type].update({key: lim}) class LevelDBAlloc(Alloc): @@ -85,9 +87,11 @@ def __init__(self, disk_alloc_dict: dict, proportions: dict): self.values = {} for size_name in disk_alloc_dict: self.values[size_name] = {} - for key, value in proportions.items(): - lim = int(value * disk_alloc_dict[size_name]['max_skaled_leveldb_storage_bytes']) # noqa - self.values[size_name][key] = lim + for allocation_type, limits in disk_alloc_dict[size_name].items(): + self.values[size_name][allocation_type] = {} + for key, value in proportions.items(): + lim = int(value * limits['max_skaled_leveldb_storage_bytes']) # noqa + self.values[size_name][allocation_type][key] = lim def calculate_free_disk_space(disk_size: int) -> int: @@ -192,7 +196,7 @@ def main(): skale_node_path = os.environ['SKALE_NODE_PATH'] allocation = generate_schain_allocation(skale_node_path) print('Generated allocation') - allocation_filepath = os.path.join(skale_node_path, 'schain_allocation_new.yml') + allocation_filepath = os.path.join(skale_node_path, 'schain_allocation.yml') save_allocation(allocation, allocation_filepath) print(f'Results saved to {allocation_filepath}') From b60bdc3985690639d08a3dbe53a4c1841af56617 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Aug 2024 17:45:20 +0000 Subject: [PATCH 040/103] Update resource_allocation.json for tests --- .../node_data/resource_allocation.json | 345 ++++++++++++++---- tools/schain_allocation.py | 1 - 2 files changed, 275 insertions(+), 71 deletions(-) diff --git a/tests/skale-data/node_data/resource_allocation.json b/tests/skale-data/node_data/resource_allocation.json index b0733174c..f62f0dab2 100644 --- a/tests/skale-data/node_data/resource_allocation.json +++ b/tests/skale-data/node_data/resource_allocation.json @@ -1,110 +1,315 @@ { "schain": { "cpu_shares": { - "test4": 22, - "test": 22, - "small": 5, - "medium": 22, - "large": 716, - "sync_node": 716 + "test4": 102, + "test": 102, + "small": 6, + "medium": 102, + "large": 819 }, "mem": { - "test4": 300647710, - "test": 300647710, - "small": 75161927, - "medium": 300647710, - "large": 9620726743, - "sync_node": 9620726743 + "test4": 1325679575, + "test": 1325679575, + "small": 82854973, + "medium": 1325679575, + "large": 10605436600 }, "disk": { - "sync_node": 75999936512, - "large": 75999936512, - "medium": 2374998016, - "small": 593749504, - "test": 2374998016, - "test4": 2374998016 + "large": 1687199940608, + "medium": 210899992576, + "small": 13181249536, + "test": 210899992576, + "test4": 210899992576 }, "volume_limits": { - "sync_node": { - "max_consensus_storage_bytes": 22799980953, - "max_file_storage_bytes": 22799980953, - "max_reserved_storage_bytes": 7599993651, - "max_skaled_leveldb_storage_bytes": 22799980953 - }, "large": { - "max_consensus_storage_bytes": 22799980953, - "max_file_storage_bytes": 22799980953, - "max_reserved_storage_bytes": 7599993651, - "max_skaled_leveldb_storage_bytes": 22799980953 + "default": { + "max_consensus_storage_bytes": 506159982182, + "max_file_storage_bytes": 506159982182, + "max_reserved_storage_bytes": 168719994060, + "max_skaled_leveldb_storage_bytes": 506159982182 + }, + "max_consensus_db": { + "max_consensus_storage_bytes": 1214783957237, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 168719994060, + "max_skaled_leveldb_storage_bytes": 303695989309 + }, + "max_contract_storage": { + "max_consensus_storage_bytes": 227771991982, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 168719994060, + "max_skaled_leveldb_storage_bytes": 1290707954565 + }, + "max_filestorage": { + "max_consensus_storage_bytes": 227771991982, + "max_file_storage_bytes": 1062935962583, + "max_reserved_storage_bytes": 168719994060, + "max_skaled_leveldb_storage_bytes": 227771991982 + }, + "no_filestorage": { + "max_consensus_storage_bytes": 759239973273, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 168719994060, + "max_skaled_leveldb_storage_bytes": 759239973273 + } }, "medium": { - "max_consensus_storage_bytes": 712499404, - "max_file_storage_bytes": 712499404, - "max_reserved_storage_bytes": 237499801, - "max_skaled_leveldb_storage_bytes": 712499404 + "default": { + "max_consensus_storage_bytes": 63269997772, + "max_file_storage_bytes": 63269997772, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 63269997772 + }, + "max_consensus_db": { + "max_consensus_storage_bytes": 151847994654, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 37961998663 + }, + "max_contract_storage": { + "max_consensus_storage_bytes": 28471498997, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 161338494320 + }, + "max_filestorage": { + "max_consensus_storage_bytes": 28471498997, + "max_file_storage_bytes": 132866995322, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 28471498997 + }, + "no_filestorage": { + "max_consensus_storage_bytes": 94904996659, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 94904996659 + } }, "small": { - "max_consensus_storage_bytes": 178124851, - "max_file_storage_bytes": 178124851, - "max_reserved_storage_bytes": 59374950, - "max_skaled_leveldb_storage_bytes": 178124851 + "default": { + "max_consensus_storage_bytes": 3954374860, + "max_file_storage_bytes": 3954374860, + "max_reserved_storage_bytes": 1318124953, + "max_skaled_leveldb_storage_bytes": 3954374860 + }, + "max_consensus_db": { + "max_consensus_storage_bytes": 9490499665, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 1318124953, + "max_skaled_leveldb_storage_bytes": 2372624916 + }, + "max_contract_storage": { + "max_consensus_storage_bytes": 1779468687, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 1318124953, + "max_skaled_leveldb_storage_bytes": 10083655895 + }, + "max_filestorage": { + "max_consensus_storage_bytes": 1779468687, + "max_file_storage_bytes": 8304187207, + "max_reserved_storage_bytes": 1318124953, + "max_skaled_leveldb_storage_bytes": 1779468687 + }, + "no_filestorage": { + "max_consensus_storage_bytes": 5931562291, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 1318124953, + "max_skaled_leveldb_storage_bytes": 5931562291 + } }, "test": { - "max_consensus_storage_bytes": 712499404, - "max_file_storage_bytes": 712499404, - "max_reserved_storage_bytes": 237499801, - "max_skaled_leveldb_storage_bytes": 712499404 + "default": { + "max_consensus_storage_bytes": 63269997772, + "max_file_storage_bytes": 63269997772, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 63269997772 + }, + "max_consensus_db": { + "max_consensus_storage_bytes": 151847994654, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 37961998663 + }, + "max_contract_storage": { + "max_consensus_storage_bytes": 28471498997, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 161338494320 + }, + "max_filestorage": { + "max_consensus_storage_bytes": 28471498997, + "max_file_storage_bytes": 132866995322, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 28471498997 + }, + "no_filestorage": { + "max_consensus_storage_bytes": 94904996659, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 94904996659 + } }, "test4": { - "max_consensus_storage_bytes": 712499404, - "max_file_storage_bytes": 712499404, - "max_reserved_storage_bytes": 237499801, - "max_skaled_leveldb_storage_bytes": 712499404 + "default": { + "max_consensus_storage_bytes": 63269997772, + "max_file_storage_bytes": 63269997772, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 63269997772 + }, + "max_consensus_db": { + "max_consensus_storage_bytes": 151847994654, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 37961998663 + }, + "max_contract_storage": { + "max_consensus_storage_bytes": 28471498997, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 161338494320 + }, + "max_filestorage": { + "max_consensus_storage_bytes": 28471498997, + "max_file_storage_bytes": 132866995322, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 28471498997 + }, + "no_filestorage": { + "max_consensus_storage_bytes": 94904996659, + "max_file_storage_bytes": 0, + "max_reserved_storage_bytes": 21089999257, + "max_skaled_leveldb_storage_bytes": 94904996659 + } } }, "leveldb_limits": { - "sync_node": { - "contract_storage": 13679988571, - "db_storage": 9119992381 - }, "large": { - "contract_storage": 13679988571, - "db_storage": 9119992381 + "default": { + "contract_storage": 303695989309, + "db_storage": 101231996436 + }, + "max_consensus_db": { + "contract_storage": 182217593585, + "db_storage": 60739197861 + }, + "max_contract_storage": { + "contract_storage": 774424772739, + "db_storage": 258141590913 + }, + "max_filestorage": { + "contract_storage": 136663195189, + "db_storage": 45554398396 + }, + "no_filestorage": { + "contract_storage": 455543983963, + "db_storage": 151847994654 + } }, "medium": { - "contract_storage": 427499642, - "db_storage": 284999761 + "default": { + "contract_storage": 37961998663, + "db_storage": 12653999554 + }, + "max_consensus_db": { + "contract_storage": 22777199197, + "db_storage": 7592399732 + }, + "max_contract_storage": { + "contract_storage": 96803096592, + "db_storage": 32267698864 + }, + "max_filestorage": { + "contract_storage": 17082899398, + "db_storage": 5694299799 + }, + "no_filestorage": { + "contract_storage": 56942997995, + "db_storage": 18980999331 + } }, "small": { - "contract_storage": 106874910, - "db_storage": 71249940 + "default": { + "contract_storage": 2372624916, + "db_storage": 790874972 + }, + "max_consensus_db": { + "contract_storage": 1423574949, + "db_storage": 474524983 + }, + "max_contract_storage": { + "contract_storage": 6050193537, + "db_storage": 2016731179 + }, + "max_filestorage": { + "contract_storage": 1067681212, + "db_storage": 355893737 + }, + "no_filestorage": { + "contract_storage": 3558937374, + "db_storage": 1186312458 + } }, "test": { - "contract_storage": 427499642, - "db_storage": 284999761 + "default": { + "contract_storage": 37961998663, + "db_storage": 12653999554 + }, + "max_consensus_db": { + "contract_storage": 22777199197, + "db_storage": 7592399732 + }, + "max_contract_storage": { + "contract_storage": 96803096592, + "db_storage": 32267698864 + }, + "max_filestorage": { + "contract_storage": 17082899398, + "db_storage": 5694299799 + }, + "no_filestorage": { + "contract_storage": 56942997995, + "db_storage": 18980999331 + } }, "test4": { - "contract_storage": 427499642, - "db_storage": 284999761 + "default": { + "contract_storage": 37961998663, + "db_storage": 12653999554 + }, + "max_consensus_db": { + "contract_storage": 22777199197, + "db_storage": 7592399732 + }, + "max_contract_storage": { + "contract_storage": 96803096592, + "db_storage": 32267698864 + }, + "max_filestorage": { + "contract_storage": 17082899398, + "db_storage": 5694299799 + }, + "no_filestorage": { + "contract_storage": 56942997995, + "db_storage": 18980999331 + } } } }, "ima": { "cpu_shares": { - "test4": 9, - "test": 9, - "small": 2, - "medium": 9, - "large": 307, - "sync_node": 307 + "test4": 25, + "test": 25, + "small": 1, + "medium": 25, + "large": 204 }, "mem": { - "test4": 128849018, - "test": 128849018, - "small": 32212254, - "medium": 128849018, - "large": 4123168604, - "sync_node": 4123168604 + "test4": 331419893, + "test": 331419893, + "small": 20713743, + "medium": 331419893, + "large": 2651359150 } } } \ No newline at end of file diff --git a/tools/schain_allocation.py b/tools/schain_allocation.py index 932e6472e..caae6aacc 100644 --- a/tools/schain_allocation.py +++ b/tools/schain_allocation.py @@ -166,7 +166,6 @@ def generate_shared_space_value( def generate_schain_allocation(skale_node_path: str) -> dict: configs_filepath = os.path.join(skale_node_path, 'static_params.yaml') - schain_allocation_filepath = os.path.join(skale_node_path, 'schain_allocation.yml') configs = safe_load_yaml(configs_filepath) schain_allocation = {} From e0f2c328a3710b0509a2c35c48d2ef9bd530d254 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 16 Aug 2024 17:46:43 +0000 Subject: [PATCH 041/103] Handle allocation type in schain config generation --- core/schains/config/generator.py | 1 + core/schains/config/predeployed.py | 6 ++- core/schains/config/schain_info.py | 8 ++-- core/schains/limits.py | 13 ++++-- tests/schains/config/generator_test.py | 56 ++++++++++++++++++++++++ tests/schains/config/predeployed_test.py | 4 ++ tests/test_generate_config_sync.py | 2 + 7 files changed, 83 insertions(+), 7 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index fe4eef631..9608ea7de 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -189,6 +189,7 @@ def generate_schain_config( logger.info('Static accounts not found, generating regular accounts section') predeployed_accounts = generate_predeployed_accounts( schain_name=schain['name'], + allocation_type=schain['allocationType'], schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, diff --git a/core/schains/config/predeployed.py b/core/schains/config/predeployed.py index 43e85997b..eddddb343 100644 --- a/core/schains/config/predeployed.py +++ b/core/schains/config/predeployed.py @@ -19,6 +19,7 @@ import logging +from skale.dataclasses.schain_options import AllocationType from skale.wallets.web3_wallet import public_key_to_address from etherbase_predeployed import ( @@ -60,6 +61,7 @@ def generate_predeployed_accounts( schain_name: str, schain_type: SchainType, + allocation_type: AllocationType, schain_nodes: list, on_chain_owner: str, mainnet_owner: str, @@ -80,6 +82,7 @@ def generate_predeployed_accounts( if generation >= Gen.ONE: v1_predeployed_contracts = generate_v1_predeployed_contracts( schain_type=schain_type, + allocation_type=allocation_type, on_chain_owner=on_chain_owner, mainnet_owner=mainnet_owner, originator_address=originator_address, @@ -94,6 +97,7 @@ def generate_predeployed_accounts( def generate_v1_predeployed_contracts( schain_type: SchainType, + allocation_type: AllocationType, on_chain_owner: str, mainnet_owner: str, originator_address: str, @@ -127,7 +131,7 @@ def generate_v1_predeployed_contracts( ima=message_proxy_for_schain_address, ) - allocated_storage = get_fs_allocated_storage(schain_type) + allocated_storage = get_fs_allocated_storage(schain_type, allocation_type) filestorage_generator = UpgradeableFileStorageGenerator() filestorage_predeployed = filestorage_generator.generate_allocation( contract_address=FILESTORAGE_ADDRESS, diff --git a/core/schains/config/schain_info.py b/core/schains/config/schain_info.py index d6a5bab83..73cacc111 100644 --- a/core/schains/config/schain_info.py +++ b/core/schains/config/schain_info.py @@ -19,7 +19,7 @@ from dataclasses import dataclass -from core.schains.limits import get_schain_limit, get_schain_type +from core.schains.limits import get_allocation_type, get_schain_limit, get_schain_type from core.schains.types import MetricType from tools.configs.schains import MAX_CONSENSUS_STORAGE_INF_VALUE @@ -75,11 +75,13 @@ def generate_schain_info( sync_node: bool, archive: bool ) -> SChainInfo: + print('HERE', schain) schain_type = get_schain_type(schain['partOfNode']) - volume_limits = get_schain_limit(schain_type, MetricType.volume_limits) + allocation_type = schain['allocationType'].name.lower() + volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] if sync_node and archive: volume_limits['max_consensus_storage_bytes'] = MAX_CONSENSUS_STORAGE_INF_VALUE - leveldb_limits = get_schain_limit(schain_type, MetricType.leveldb_limits) + leveldb_limits = get_schain_limit(schain_type, MetricType.leveldb_limits)[allocation_type] contract_storage_limit = leveldb_limits['contract_storage'] db_storage_limit = leveldb_limits['db_storage'] diff --git a/core/schains/limits.py b/core/schains/limits.py index e7b555d50..0e2021d20 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -17,6 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from skale.dataclasses.schain_options import AllocationType, SchainOptions + from core.schains.types import SchainType, ContainerType, MetricType from tools.helper import read_json from tools.configs.resource_allocation import ( @@ -33,6 +35,10 @@ def get_schain_type(schain_part_of_node: int) -> SchainType: return SchainType(schain_part_of_node) +def get_allocation_type(schain_options: SchainOptions) -> str: + return schain_options.allocation_type.name.lower() + + def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: ContainerType, resource_allocation: dict) -> int: """ @@ -53,7 +59,7 @@ def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: return resource_allocation[container_type.name][metric_type.name][schain_type.name] -def get_schain_limit(schain_type: SchainType, metric_type: MetricType) -> int: +def get_schain_limit(schain_type: SchainType, metric_type: MetricType) -> dict: alloc = _get_resource_allocation_info() return get_limit(metric_type, schain_type, ContainerType.schain, alloc) @@ -63,8 +69,9 @@ def get_ima_limit(schain_type: SchainType, metric_type: MetricType) -> int: return get_limit(metric_type, schain_type, ContainerType.ima, alloc) -def get_fs_allocated_storage(schain_type: SchainType) -> str: - volume_limits = get_schain_limit(schain_type, MetricType.volume_limits) +def get_fs_allocated_storage(schain_type: SchainType, allocation_type: AllocationType) -> str: + allocation_type_name = allocation_type.name.lower() + volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type_name] return volume_limits[FILESTORAGE_LIMIT_OPTION_NAME] diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 570f766c4..875a9b49b 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -3,6 +3,7 @@ from pathlib import Path import pytest +from skale.dataclasses.schain_options import AllocationType from etherbase_predeployed import ETHERBASE_ADDRESS, ETHERBASE_IMPLEMENTATION_ADDRESS from marionette_predeployed import MARIONETTE_ADDRESS, MARIONETTE_IMPLEMENTATION_ADDRESS from filestorage_predeployed import FILESTORAGE_ADDRESS, FILESTORAGE_IMPLEMENTATION_ADDRESS @@ -393,6 +394,61 @@ def test_generate_schain_config_gen0_schain_id( assert config['skaleConfig']['sChain']['schainID'] == 1 +def test_generate_schain_config_allocation_type(schain_secret_key_file_default_chain, skale_manager_opts): + node_id, generation, rotation_id = 1, 1, 0 + ecdsa_key_name = 'test' + node_groups = {} + + schain = { + 'name': 'test_schain', + 'partOfNode': 0, + 'generation': 1, + 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, + 'originator': '0x0000000000000000000000000000000000000000', + 'multitransactionMode': True, + 'allocationType': AllocationType.NO_FILESTORAGE + } + schain_config = generate_schain_config( + schain=schain, + node=TEST_NODE, + node_id=node_id, + ecdsa_key_name=ecdsa_key_name, + rotation_id=rotation_id, + schain_nodes_with_schains=get_schain_node_with_schains('test_schain'), + node_groups=node_groups, + generation=generation, + is_owner_contract=True, + skale_manager_opts=skale_manager_opts, + common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, + schain_base_port=10000 + ) + config = schain_config.to_dict() + assert config['skaleConfig']['sChain']['maxConsensusStorageBytes'] == 94904996659 + assert config['skaleConfig']['sChain']['maxSkaledLeveldbStorageBytes'] == 94904996659 + assert config['skaleConfig']['sChain']['maxFileStorageBytes'] == 0 + + schain['allocationType'] = AllocationType.MAX_CONSENSUS_DB + + schain_config = generate_schain_config( + schain=schain, + node=TEST_NODE, + node_id=node_id, + ecdsa_key_name=ecdsa_key_name, + rotation_id=rotation_id, + schain_nodes_with_schains=get_schain_node_with_schains('test_schain'), + node_groups=node_groups, + generation=generation, + is_owner_contract=True, + skale_manager_opts=skale_manager_opts, + common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, + schain_base_port=10000 + ) + config = schain_config.to_dict() + assert config['skaleConfig']['sChain']['maxConsensusStorageBytes'] == 151847994654 + assert config['skaleConfig']['sChain']['maxSkaledLeveldbStorageBytes'] == 37961998663 + assert config['skaleConfig']['sChain']['maxFileStorageBytes'] == 0 + + def test_generate_schain_config_with_skale_gen2( skale, schain_on_contracts, diff --git a/tests/schains/config/predeployed_test.py b/tests/schains/config/predeployed_test.py index 06bf53c51..aefdc24fe 100644 --- a/tests/schains/config/predeployed_test.py +++ b/tests/schains/config/predeployed_test.py @@ -1,6 +1,7 @@ from marionette_predeployed import MARIONETTE_ADDRESS from etherbase_predeployed import ETHERBASE_ADDRESS from context_predeployed import CONTEXT_ADDRESS +from skale.dataclasses.schain_options import AllocationType from core.schains.types import SchainType from core.schains.config.predeployed import ( @@ -17,6 +18,7 @@ def test_generate_predeployed_accounts(): predeployed_section = generate_predeployed_accounts( schain_name='abc', schain_type=SchainType.medium, + allocation_type=AllocationType.DEFAULT, schain_nodes={}, on_chain_owner='0xD1000000000000000000000000000000000000D1', mainnet_owner='0xD4000000000000000000000000000000000000D4', @@ -28,6 +30,7 @@ def test_generate_predeployed_accounts(): predeployed_section = generate_predeployed_accounts( schain_name='abc', schain_type=SchainType.medium, + allocation_type=AllocationType.DEFAULT, schain_nodes={}, on_chain_owner='0xD1000000000000000000000000000000000000D1', mainnet_owner='0xD4000000000000000000000000000000000000D4', @@ -40,6 +43,7 @@ def test_generate_predeployed_accounts(): def test_generate_v1_predeployed_contracts(): v1_precompiled_contracts = generate_v1_predeployed_contracts( schain_type=SchainType.medium, + allocation_type=AllocationType.DEFAULT, on_chain_owner=MARIONETTE_ADDRESS, mainnet_owner='0x0123456789Ab', message_proxy_for_schain_address='0x987654321fC', diff --git a/tests/test_generate_config_sync.py b/tests/test_generate_config_sync.py index 00b95ba3f..e02dce9cb 100644 --- a/tests/test_generate_config_sync.py +++ b/tests/test_generate_config_sync.py @@ -1,5 +1,6 @@ import json import pytest +from skale.dataclasses.schain_options import AllocationType from skale.schain_config.rotation_history import get_previous_schain_groups from core.schains.config.predeployed import generate_predeployed_accounts @@ -48,6 +49,7 @@ def test_generate_config(skale): predeployed_accounts = generate_predeployed_accounts( schain_name=schain['name'], + allocation_type=AllocationType.DEFAULT, schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, From ffb6021beb7e9e98c10ec60590c75bf60de1f00e Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Aug 2024 15:17:58 +0000 Subject: [PATCH 042/103] Update skale.py to 6.3b0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 27e560567..506ee512e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ Jinja2==3.1.2 docker==6.1.3 python-iptables==1.0.1 -skale.py==6.2b0 +skale.py==6.3b0 requests==2.31 ima-predeployed==2.1.0b0 From 44b691b592b8babfd0a74564cc0b7ac6118fdbf6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Aug 2024 15:21:54 +0000 Subject: [PATCH 043/103] Fix linter --- core/schains/config/schain_info.py | 3 +- tests/schains/config/generator_test.py | 232 ++++++++++++++----------- 2 files changed, 128 insertions(+), 107 deletions(-) diff --git a/core/schains/config/schain_info.py b/core/schains/config/schain_info.py index 73cacc111..61565c68b 100644 --- a/core/schains/config/schain_info.py +++ b/core/schains/config/schain_info.py @@ -19,7 +19,7 @@ from dataclasses import dataclass -from core.schains.limits import get_allocation_type, get_schain_limit, get_schain_type +from core.schains.limits import get_schain_limit, get_schain_type from core.schains.types import MetricType from tools.configs.schains import MAX_CONSENSUS_STORAGE_INF_VALUE @@ -75,7 +75,6 @@ def generate_schain_info( sync_node: bool, archive: bool ) -> SChainInfo: - print('HERE', schain) schain_type = get_schain_type(schain['partOfNode']) allocation_type = schain['allocationType'].name.lower() volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 875a9b49b..2a04a0b44 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -9,13 +9,15 @@ from filestorage_predeployed import FILESTORAGE_ADDRESS, FILESTORAGE_IMPLEMENTATION_ADDRESS from config_controller_predeployed import ( CONFIG_CONTROLLER_ADDRESS, - CONFIG_CONTROLLER_IMPLEMENTATION_ADDRESS + CONFIG_CONTROLLER_IMPLEMENTATION_ADDRESS, ) from multisigwallet_predeployed import MULTISIGWALLET_ADDRESS from ima_predeployed.generator import MESSAGE_PROXY_FOR_SCHAIN_ADDRESS from core.schains.config.generator import ( - generate_schain_config_with_skale, generate_schain_config, get_schain_originator + generate_schain_config_with_skale, + generate_schain_config, + get_schain_originator, ) from core.schains.config.helper import get_schain_id from core.schains.config.predeployed import PROXY_ADMIN_PREDEPLOYED_ADDRESS @@ -25,22 +27,17 @@ NODE_ID = 1 ECDSA_KEY_NAME = 'TEST:KEY:NAME' -COMMON_BLS_PUBLIC_KEY = [123, 456, 789, 123], +COMMON_BLS_PUBLIC_KEY = ([123, 456, 789, 123],) SECRET_KEY = { - "key_share_name": "BLS_KEY:SCHAIN_ID:1:NODE_ID:0:DKG_ID:0", - "t": 3, - "n": 4, - "common_public_key": COMMON_BLS_PUBLIC_KEY, - "public_key": [ - "123", - "456", - "789", - "123" - ], - "bls_public_keys": [ - "347043388985314611088523723672849261459066865147342514766975146031592968981:16865625797537152485129819826310148884042040710059790347821575891945447848787:12298029821069512162285775240688220379514183764628345956323231135392667898379:8", # noqa - "347043388985314611088523723672849261459066865147342514766975146031592968982:16865625797537152485129819826310148884042040710059790347821575891945447848788:12298029821069512162285775240688220379514183764628345956323231135392667898380:9" # noqa + 'key_share_name': 'BLS_KEY:SCHAIN_ID:1:NODE_ID:0:DKG_ID:0', + 't': 3, + 'n': 4, + 'common_public_key': COMMON_BLS_PUBLIC_KEY, + 'public_key': ['123', '456', '789', '123'], + 'bls_public_keys': [ + '347043388985314611088523723672849261459066865147342514766975146031592968981:16865625797537152485129819826310148884042040710059790347821575891945447848787:12298029821069512162285775240688220379514183764628345956323231135392667898379:8', # noqa + '347043388985314611088523723672849261459066865147342514766975146031592968982:16865625797537152485129819826310148884042040710059790347821575891945447848788:12298029821069512162285775240688220379514183764628345956323231135392667898380:9', # noqa ], } @@ -56,7 +53,7 @@ 'generation': 1, 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': '0x0000000000000000000000000000000000000000', - 'multitransactionMode': True + 'multitransactionMode': True, } SCHAIN_WITH_ORIGINATOR = { @@ -65,7 +62,7 @@ 'generation': 1, 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True + 'multitransactionMode': True, } SCHAIN_WITH_STATIC_ACCOUNTS = { @@ -74,20 +71,22 @@ 'generation': 1, 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True + 'multitransactionMode': True, } def get_schain_node_with_schains(schain_name: str) -> list: - return [{ - 'name': 'test', - 'ip': b'\x01\x02\x03\x04', - 'publicIP': b'\x01\x02\x03\x04', - 'publicKey': '0x0B5e3eBB74eE281A24DDa3B1A4e70692c15EAC34', - 'port': 10000, - 'id': 1, - 'schains': [{'name': schain_name}] - }] + return [ + { + 'name': 'test', + 'ip': b'\x01\x02\x03\x04', + 'publicIP': b'\x01\x02\x03\x04', + 'publicKey': '0x0B5e3eBB74eE281A24DDa3B1A4e70692c15EAC34', + 'port': 10000, + 'id': 1, + 'schains': [{'name': schain_name}], + } + ] @pytest.fixture @@ -142,13 +141,32 @@ def check_node_bls_keys(info, index): def check_node_info(node_id, info): keys = [ - 'nodeID', 'nodeName', 'basePort', 'httpRpcPort', 'httpsRpcPort', - 'wsRpcPort', 'wssRpcPort', 'bindIP', 'logLevel', 'logLevelConfig', - 'imaMessageProxySChain', 'imaMessageProxyMainNet', - 'ecdsaKeyName', 'wallets', 'minCacheSize', - 'maxCacheSize', 'collectionQueueSize', 'collectionDuration', - 'transactionQueueSize', 'maxOpenLeveldbFiles', 'info-acceptors', 'imaMonitoringPort', - 'skale-manager', 'syncNode', 'pg-threads', 'pg-threads-limit' + 'nodeID', + 'nodeName', + 'basePort', + 'httpRpcPort', + 'httpsRpcPort', + 'wsRpcPort', + 'wssRpcPort', + 'bindIP', + 'logLevel', + 'logLevelConfig', + 'imaMessageProxySChain', + 'imaMessageProxyMainNet', + 'ecdsaKeyName', + 'wallets', + 'minCacheSize', + 'maxCacheSize', + 'collectionQueueSize', + 'collectionDuration', + 'transactionQueueSize', + 'maxOpenLeveldbFiles', + 'info-acceptors', + 'imaMonitoringPort', + 'skale-manager', + 'syncNode', + 'pg-threads', + 'pg-threads-limit', ] check_keys(info, keys) @@ -159,11 +177,27 @@ def check_node_info(node_id, info): def check_schain_node_info(node_id, schain_node_info, index): - check_keys(schain_node_info, - ['nodeID', 'nodeName', 'basePort', 'httpRpcPort', - 'httpsRpcPort', 'wsRpcPort', 'wssRpcPort', 'publicKey', - 'blsPublicKey0', 'blsPublicKey1', 'blsPublicKey2', - 'blsPublicKey3', 'owner', 'schainIndex', 'ip', 'publicIP']) + check_keys( + schain_node_info, + [ + 'nodeID', + 'nodeName', + 'basePort', + 'httpRpcPort', + 'httpsRpcPort', + 'wsRpcPort', + 'wssRpcPort', + 'publicKey', + 'blsPublicKey0', + 'blsPublicKey1', + 'blsPublicKey2', + 'blsPublicKey3', + 'owner', + 'schainIndex', + 'ip', + 'publicIP', + ], + ) assert schain_node_info['nodeID'] == node_id check_node_ports(schain_node_info) check_node_bls_keys(schain_node_info, index) @@ -172,34 +206,36 @@ def check_schain_node_info(node_id, schain_node_info, index): def check_schain_info(node_ids, schain_info): check_keys( schain_info, - ['schainID', 'schainName', 'blockAuthor', 'contractStorageLimit', - 'dbStorageLimit', 'snapshotIntervalSec', 'emptyBlockIntervalMs', - 'maxConsensusStorageBytes', 'maxSkaledLeveldbStorageBytes', - 'maxFileStorageBytes', 'maxReservedStorageBytes', - 'nodes', 'revertableFSPatchTimestamp', 'contractStoragePatchTimestamp'] + [ + 'schainID', + 'schainName', + 'blockAuthor', + 'contractStorageLimit', + 'dbStorageLimit', + 'snapshotIntervalSec', + 'emptyBlockIntervalMs', + 'maxConsensusStorageBytes', + 'maxSkaledLeveldbStorageBytes', + 'maxFileStorageBytes', + 'maxReservedStorageBytes', + 'nodes', + 'revertableFSPatchTimestamp', + 'contractStoragePatchTimestamp', + ], ) - for index, (nid, schain_node_info) in enumerate(zip( - node_ids, - schain_info['nodes'] - )): + for index, (nid, schain_node_info) in enumerate(zip(node_ids, schain_info['nodes'])): check_schain_node_info(nid, schain_node_info, index) def check_config(node_id, all_node_ids, config): - check_keys( - config, - ['sealEngine', 'params', 'unddos', 'genesis', 'accounts', 'skaleConfig'] - ) + check_keys(config, ['sealEngine', 'params', 'unddos', 'genesis', 'accounts', 'skaleConfig']) assert config['params']['skaleDisableChainIdCheck'] is True check_node_info(node_id, config['skaleConfig']['nodeInfo']) check_schain_info(all_node_ids, config['skaleConfig']['sChain']) def test_generate_schain_config_with_skale( - skale, - node_config, - schain_on_contracts, - schain_secret_key_file + skale, node_config, schain_on_contracts, schain_secret_key_file ): schain_name = schain_on_contracts node_ids = skale.schains_internal.get_node_ids_for_schain(schain_name) @@ -212,7 +248,7 @@ def test_generate_schain_config_with_skale( rotation_data={'rotation_id': 0, 'leaving_node': 1}, ecdsa_key_name=ECDSA_KEY_NAME, generation=0, - node_options=NodeOptions() + node_options=NodeOptions(), ) check_config(current_node_id, node_ids, schain_config.to_dict()) @@ -224,7 +260,7 @@ def test_generate_schain_config_gen0(schain_secret_key_file_default_chain, skale 'generation': 0, 'mainnetOwner': '0x30E1C96277735B03E59B3098204fd04FD0e78a46', 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True + 'multitransactionMode': True, } node_id, generation, rotation_id = 1, 0, 0 @@ -243,7 +279,7 @@ def test_generate_schain_config_gen0(schain_secret_key_file_default_chain, skale is_owner_contract=False, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() @@ -268,7 +304,7 @@ def test_generate_schain_config_gen1(schain_secret_key_file_default_chain, skale is_owner_contract=True, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() @@ -295,8 +331,7 @@ def test_generate_schain_config_gen1(schain_secret_key_file_default_chain, skale def test_generate_schain_config_gen1_pk_owner( - schain_secret_key_file_default_chain, - skale_manager_opts + schain_secret_key_file_default_chain, skale_manager_opts ): node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' @@ -314,7 +349,7 @@ def test_generate_schain_config_gen1_pk_owner( is_owner_contract=False, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() @@ -323,8 +358,7 @@ def test_generate_schain_config_gen1_pk_owner( def test_generate_schain_config_gen2_schain_id( - schain_secret_key_file_default_chain, - skale_manager_opts + schain_secret_key_file_default_chain, skale_manager_opts ): node_id, generation, rotation_id = 1, 2, 0 ecdsa_key_name = 'test' @@ -342,15 +376,14 @@ def test_generate_schain_config_gen2_schain_id( is_owner_contract=False, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() assert config['skaleConfig']['sChain']['schainID'] == 2755779573749746 def test_generate_schain_config_gen1_schain_id( - schain_secret_key_file_default_chain, - skale_manager_opts + schain_secret_key_file_default_chain, skale_manager_opts ): node_id, generation, rotation_id = 1, 1, 0 schain_config = generate_schain_config( @@ -365,15 +398,14 @@ def test_generate_schain_config_gen1_schain_id( is_owner_contract=False, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() assert config['skaleConfig']['sChain']['schainID'] == 1 def test_generate_schain_config_gen0_schain_id( - schain_secret_key_file_default_chain, - skale_manager_opts + schain_secret_key_file_default_chain, skale_manager_opts ): node_id, generation, rotation_id = 1, 0, 0 schain_config = generate_schain_config( @@ -388,13 +420,15 @@ def test_generate_schain_config_gen0_schain_id( is_owner_contract=False, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() assert config['skaleConfig']['sChain']['schainID'] == 1 -def test_generate_schain_config_allocation_type(schain_secret_key_file_default_chain, skale_manager_opts): +def test_generate_schain_config_allocation_type( + schain_secret_key_file_default_chain, skale_manager_opts +): node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' node_groups = {} @@ -406,7 +440,7 @@ def test_generate_schain_config_allocation_type(schain_secret_key_file_default_c 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': '0x0000000000000000000000000000000000000000', 'multitransactionMode': True, - 'allocationType': AllocationType.NO_FILESTORAGE + 'allocationType': AllocationType.NO_FILESTORAGE, } schain_config = generate_schain_config( schain=schain, @@ -420,7 +454,7 @@ def test_generate_schain_config_allocation_type(schain_secret_key_file_default_c is_owner_contract=True, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() assert config['skaleConfig']['sChain']['maxConsensusStorageBytes'] == 94904996659 @@ -441,7 +475,7 @@ def test_generate_schain_config_allocation_type(schain_secret_key_file_default_c is_owner_contract=True, skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, - schain_base_port=10000 + schain_base_port=10000, ) config = schain_config.to_dict() assert config['skaleConfig']['sChain']['maxConsensusStorageBytes'] == 151847994654 @@ -450,10 +484,7 @@ def test_generate_schain_config_allocation_type(schain_secret_key_file_default_c def test_generate_schain_config_with_skale_gen2( - skale, - schain_on_contracts, - schain_secret_key_file, - node_config + skale, schain_on_contracts, schain_secret_key_file, node_config ): schain_name = schain_on_contracts node_ids = skale.schains_internal.get_node_ids_for_schain(schain_name) @@ -465,7 +496,7 @@ def test_generate_schain_config_with_skale_gen2( node_config=node_config, rotation_data={'rotation_id': 0, 'leaving_node': 1}, ecdsa_key_name=ECDSA_KEY_NAME, - generation=2 + generation=2, ) schain_config_dict = schain_config.to_dict() check_config(current_node_id, node_ids, schain_config_dict) @@ -480,10 +511,7 @@ def test_get_schain_originator(predeployed_ima): assert originator == TEST_ORIGINATOR_ADDRESS -def test_generate_sync_node_config( - schain_secret_key_file_default_chain, - skale_manager_opts -): +def test_generate_sync_node_config(schain_secret_key_file_default_chain, skale_manager_opts): node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' node_groups = {} @@ -501,7 +529,7 @@ def test_generate_sync_node_config( skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, schain_base_port=10000, - sync_node=True + sync_node=True, ) config = schain_config.to_dict() @@ -510,8 +538,7 @@ def test_generate_sync_node_config( def test_generate_sync_node_config_archive_catchup( - schain_secret_key_file_default_chain, - skale_manager_opts + schain_secret_key_file_default_chain, skale_manager_opts ): node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' @@ -530,14 +557,13 @@ def test_generate_sync_node_config_archive_catchup( skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, schain_base_port=10000, - sync_node=True + sync_node=True, ) config = schain_config.to_dict() assert not config['skaleConfig']['nodeInfo'].get('syncFromCatchup') assert not config['skaleConfig']['nodeInfo'].get('archiveMode') - assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < \ - 1000000000000000000 + assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < 1000000000000000000 schain_config = generate_schain_config( schain=SCHAIN_WITHOUT_ORIGINATOR, @@ -554,14 +580,13 @@ def test_generate_sync_node_config_archive_catchup( schain_base_port=10000, sync_node=True, archive=False, - catchup=True + catchup=True, ) config = schain_config.to_dict() assert config['skaleConfig']['nodeInfo'].get('syncFromCatchup') assert config['skaleConfig']['nodeInfo'].get('archiveMode') is False - assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < \ - 1000000000000000000 + assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < 1000000000000000000 schain_config = generate_schain_config( schain=SCHAIN_WITHOUT_ORIGINATOR, @@ -578,14 +603,13 @@ def test_generate_sync_node_config_archive_catchup( schain_base_port=10000, sync_node=False, archive=False, - catchup=True + catchup=True, ) config = schain_config.to_dict() assert config['skaleConfig']['nodeInfo'].get('syncFromCatchup') is None assert config['skaleConfig']['nodeInfo'].get('archiveMode') is None - assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < \ - 1000000000000000000 + assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < 1000000000000000000 schain_config = generate_schain_config( schain=SCHAIN_WITHOUT_ORIGINATOR, @@ -602,19 +626,17 @@ def test_generate_sync_node_config_archive_catchup( schain_base_port=10000, sync_node=True, archive=True, - catchup=True + catchup=True, ) config = schain_config.to_dict() assert config['skaleConfig']['nodeInfo'].get('syncFromCatchup') assert config['skaleConfig']['nodeInfo'].get('archiveMode') - assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') == \ - 1000000000000000000 + assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') == 1000000000000000000 def test_generate_sync_node_config_static_accounts( - schain_secret_key_file_default_chain, - skale_manager_opts + schain_secret_key_file_default_chain, skale_manager_opts ): node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' @@ -633,7 +655,7 @@ def test_generate_sync_node_config_static_accounts( skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, schain_base_port=10000, - sync_node=True + sync_node=True, ) config = schain_config.to_dict() assert config['accounts'].get('0x1111111') @@ -652,7 +674,7 @@ def test_generate_sync_node_config_static_accounts( skale_manager_opts=skale_manager_opts, common_bls_public_keys=COMMON_BLS_PUBLIC_KEY, schain_base_port=10000, - sync_node=True + sync_node=True, ) config = schain_config.to_dict() assert not config['accounts'].get('0x1111111') From 372fc3c9277d6f978eceb8796cfa1db0d44fe5f3 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 19 Aug 2024 18:11:02 +0000 Subject: [PATCH 044/103] Fix config generator tests --- tests/schains/config/generator_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 2a04a0b44..3006a0a4a 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -54,6 +54,7 @@ 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': '0x0000000000000000000000000000000000000000', 'multitransactionMode': True, + 'allocationType': AllocationType.DEFAULT } SCHAIN_WITH_ORIGINATOR = { @@ -63,6 +64,7 @@ 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': TEST_ORIGINATOR_ADDRESS, 'multitransactionMode': True, + 'allocationType': AllocationType.DEFAULT } SCHAIN_WITH_STATIC_ACCOUNTS = { @@ -72,6 +74,7 @@ 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, 'originator': TEST_ORIGINATOR_ADDRESS, 'multitransactionMode': True, + 'allocationType': AllocationType.DEFAULT } @@ -261,6 +264,7 @@ def test_generate_schain_config_gen0(schain_secret_key_file_default_chain, skale 'mainnetOwner': '0x30E1C96277735B03E59B3098204fd04FD0e78a46', 'originator': TEST_ORIGINATOR_ADDRESS, 'multitransactionMode': True, + 'allocationType': AllocationType.DEFAULT } node_id, generation, rotation_id = 1, 0, 0 @@ -534,7 +538,7 @@ def test_generate_sync_node_config(schain_secret_key_file_default_chain, skale_m config = schain_config.to_dict() assert config['skaleConfig']['nodeInfo']['syncNode'] - assert config['skaleConfig']['sChain']['dbStorageLimit'] == 284999761 + assert config['skaleConfig']['sChain']['dbStorageLimit'] == 12653999554 def test_generate_sync_node_config_archive_catchup( From c501d3bb40f5a7817b5bb093bf8b1c5dc1847852 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Aug 2024 12:46:43 +0000 Subject: [PATCH 045/103] Fix static params test --- tests/schains/config/static_params_test.py | 62 +++++++++++++--------- tests/skale-data/config/static_params.yaml | 4 +- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tests/schains/config/static_params_test.py b/tests/schains/config/static_params_test.py index 0fb7e4709..e49548fa7 100644 --- a/tests/schains/config/static_params_test.py +++ b/tests/schains/config/static_params_test.py @@ -5,14 +5,14 @@ get_schain_static_param, get_static_schain_cmd, get_static_schain_info, - get_static_node_info + get_static_node_info, ) from tools.configs import ENV_TYPE TEST_SCHAIN_NAME = 'test-schain' -LEGACY_TS_NAME = 'revertableFSPatchTimestamp' -NEW_TS_NAME = 'contractStorageZeroValuePatchTimestamp' +TS_FOR_ALL_NAME = 'revertableFSPatchTimestamp' +TS_BY_CHAIN_NAME = 'flexibleDeploymentPatchTimestamp' def test_get_static_schain_cmd(): @@ -23,13 +23,24 @@ def test_get_static_schain_cmd(): def test_get_static_schain_info(): schain_info = get_static_schain_info(TEST_SCHAIN_NAME) assert schain_info == { - "contractStorageZeroValuePatchTimestamp": 1500000, - "revertableFSPatchTimestamp": 1000000, - "contractStoragePatchTimestamp": 1000000, - "snapshotIntervalSec": 0, - "emptyBlockIntervalMs": 10000, - "snapshotDownloadTimeout": 18000, - "snapshotDownloadInactiveTimeout": 120 + 'contractStorageZeroValuePatchTimestamp': 1000000, + 'revertableFSPatchTimestamp': 1000000, + 'contractStoragePatchTimestamp': 1000000, + 'verifyDaSigsPatchTimestamp': 1000000, + 'storageDestructionPatchTimestamp': 1000000, + 'powCheckPatchTimestamp': 1000000, + 'skipInvalidTransactionsPatchTimestamp': 1000000, + 'pushZeroPatchTimestamp': 1712142000, + 'precompiledConfigPatchTimestamp': 1712314800, + 'correctForkInPowPatchTimestamp': 1711969200, + 'EIP1559TransactionsPatchTimestamp': 0, + 'fastConsensusPatchTimestamp': 0, + 'flexibleDeploymentPatchTimestamp': 0, + 'verifyBlsSyncPatchTimestamp': 0, + 'snapshotIntervalSec': 3600, + 'emptyBlockIntervalMs': 10000, + 'snapshotDownloadTimeout': 18000, + 'snapshotDownloadInactiveTimeout': 120, } @@ -37,35 +48,34 @@ def test_get_static_schain_info_custom_chain_ts(): custom_schain_info = get_static_schain_info(TEST_SCHAIN_NAME) default_schain_info = get_static_schain_info('test') - assert custom_schain_info[LEGACY_TS_NAME] == default_schain_info[LEGACY_TS_NAME] - assert custom_schain_info[NEW_TS_NAME] != default_schain_info[NEW_TS_NAME] + assert custom_schain_info[TS_FOR_ALL_NAME] == default_schain_info[TS_FOR_ALL_NAME] + assert custom_schain_info[TS_BY_CHAIN_NAME] != default_schain_info[TS_BY_CHAIN_NAME] - assert custom_schain_info[NEW_TS_NAME] == 1500000 - assert default_schain_info[NEW_TS_NAME] == 800000 + assert custom_schain_info[TS_BY_CHAIN_NAME] == 1500000 + assert default_schain_info[TS_BY_CHAIN_NAME] == 800000 def test_get_schain_static_param(): static_params = get_static_params(ENV_TYPE) legacy_ts_info = get_schain_static_param( - static_params['schain'][LEGACY_TS_NAME], - TEST_SCHAIN_NAME + static_params['schain'][TS_FOR_ALL_NAME], TEST_SCHAIN_NAME ) - assert legacy_ts_info == static_params['schain'].get(LEGACY_TS_NAME) + assert legacy_ts_info == static_params['schain'].get(TS_FOR_ALL_NAME) + print(static_params['schain']) new_ts_info_custom_chain = get_schain_static_param( - static_params['schain'][NEW_TS_NAME], - TEST_SCHAIN_NAME + static_params['schain'][TS_BY_CHAIN_NAME], TEST_SCHAIN_NAME ) - assert new_ts_info_custom_chain != static_params['schain'][NEW_TS_NAME] - assert new_ts_info_custom_chain == static_params['schain'][NEW_TS_NAME][TEST_SCHAIN_NAME] + + assert new_ts_info_custom_chain != static_params['schain'][TS_BY_CHAIN_NAME] + assert new_ts_info_custom_chain == static_params['schain'][TS_BY_CHAIN_NAME][TEST_SCHAIN_NAME] new_ts_info_default_chain = get_schain_static_param( - static_params['schain'][NEW_TS_NAME], - 'test' + static_params['schain'][TS_BY_CHAIN_NAME], 'test' ) - assert new_ts_info_default_chain != static_params['schain'][NEW_TS_NAME] - assert new_ts_info_default_chain != static_params['schain'][NEW_TS_NAME].get('test') - assert new_ts_info_default_chain == static_params['schain'][NEW_TS_NAME].get('default') + assert new_ts_info_default_chain != static_params['schain'][TS_BY_CHAIN_NAME] + assert new_ts_info_default_chain != static_params['schain'][TS_BY_CHAIN_NAME].get('test') + assert new_ts_info_default_chain == static_params['schain'][TS_BY_CHAIN_NAME].get('default') def test_get_static_node_info(): diff --git a/tests/skale-data/config/static_params.yaml b/tests/skale-data/config/static_params.yaml index 4e0846096..7cfed05fe 100644 --- a/tests/skale-data/config/static_params.yaml +++ b/tests/skale-data/config/static_params.yaml @@ -380,7 +380,9 @@ envs: correctForkInPowPatchTimestamp: 1711969200 EIP1559TransactionsPatchTimestamp: 0 fastConsensusPatchTimestamp: 0 - flexibleDeploymentPatchTimestamp: 0 + flexibleDeploymentPatchTimestamp: + default: 0 + test-schain: 1723460400 verifyBlsSyncPatchTimestamp: 0 snapshotIntervalSec: 3600 emptyBlockIntervalMs: 10000 From 563b1279b5a627d221a1aa84118704b1885b6c0a Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 20 Aug 2024 16:32:41 +0000 Subject: [PATCH 046/103] Fix static params test --- tests/schains/config/static_params_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/schains/config/static_params_test.py b/tests/schains/config/static_params_test.py index e49548fa7..f6c18cd7f 100644 --- a/tests/schains/config/static_params_test.py +++ b/tests/schains/config/static_params_test.py @@ -35,7 +35,7 @@ def test_get_static_schain_info(): 'correctForkInPowPatchTimestamp': 1711969200, 'EIP1559TransactionsPatchTimestamp': 0, 'fastConsensusPatchTimestamp': 0, - 'flexibleDeploymentPatchTimestamp': 0, + 'flexibleDeploymentPatchTimestamp': 1723460400, 'verifyBlsSyncPatchTimestamp': 0, 'snapshotIntervalSec': 3600, 'emptyBlockIntervalMs': 10000, @@ -51,8 +51,8 @@ def test_get_static_schain_info_custom_chain_ts(): assert custom_schain_info[TS_FOR_ALL_NAME] == default_schain_info[TS_FOR_ALL_NAME] assert custom_schain_info[TS_BY_CHAIN_NAME] != default_schain_info[TS_BY_CHAIN_NAME] - assert custom_schain_info[TS_BY_CHAIN_NAME] == 1500000 - assert default_schain_info[TS_BY_CHAIN_NAME] == 800000 + assert custom_schain_info[TS_BY_CHAIN_NAME] == 1723460400 + assert default_schain_info[TS_BY_CHAIN_NAME] == 0 def test_get_schain_static_param(): From ce9bc0af1154c5c60063600a27c84f07b38364fd Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 22 Aug 2024 12:00:29 +0000 Subject: [PATCH 047/103] Unify allocation type to str conversion --- core/schains/config/generator.py | 4 ++-- core/schains/config/predeployed.py | 5 ++--- core/schains/config/schain_info.py | 4 ++-- core/schains/config/skale_section.py | 2 +- core/schains/limits.py | 11 ++++------- tests/schains/config/predeployed_test.py | 4 ++-- tests/test_generate_config_sync.py | 3 +-- 7 files changed, 14 insertions(+), 19 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index 9608ea7de..c85ec6b1f 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -37,7 +37,7 @@ from core.schains.config.static_accounts import is_static_accounts, static_accounts from core.schains.config.helper import get_chain_id, get_schain_id from core.schains.dkg.utils import get_common_bls_public_key -from core.schains.limits import get_schain_type +from core.schains.limits import get_allocation_type, get_schain_type from tools.helper import read_json from tools.configs.schains import BASE_SCHAIN_CONFIG_FILEPATH @@ -189,7 +189,7 @@ def generate_schain_config( logger.info('Static accounts not found, generating regular accounts section') predeployed_accounts = generate_predeployed_accounts( schain_name=schain['name'], - allocation_type=schain['allocationType'], + allocation_type=get_allocation_type(schain), schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, diff --git a/core/schains/config/predeployed.py b/core/schains/config/predeployed.py index eddddb343..09b779114 100644 --- a/core/schains/config/predeployed.py +++ b/core/schains/config/predeployed.py @@ -19,7 +19,6 @@ import logging -from skale.dataclasses.schain_options import AllocationType from skale.wallets.web3_wallet import public_key_to_address from etherbase_predeployed import ( @@ -61,7 +60,7 @@ def generate_predeployed_accounts( schain_name: str, schain_type: SchainType, - allocation_type: AllocationType, + allocation_type: str, schain_nodes: list, on_chain_owner: str, mainnet_owner: str, @@ -97,7 +96,7 @@ def generate_predeployed_accounts( def generate_v1_predeployed_contracts( schain_type: SchainType, - allocation_type: AllocationType, + allocation_type: str, on_chain_owner: str, mainnet_owner: str, originator_address: str, diff --git a/core/schains/config/schain_info.py b/core/schains/config/schain_info.py index 61565c68b..8397108f5 100644 --- a/core/schains/config/schain_info.py +++ b/core/schains/config/schain_info.py @@ -19,7 +19,7 @@ from dataclasses import dataclass -from core.schains.limits import get_schain_limit, get_schain_type +from core.schains.limits import get_allocation_type, get_schain_limit, get_schain_type from core.schains.types import MetricType from tools.configs.schains import MAX_CONSENSUS_STORAGE_INF_VALUE @@ -76,7 +76,7 @@ def generate_schain_info( archive: bool ) -> SChainInfo: schain_type = get_schain_type(schain['partOfNode']) - allocation_type = schain['allocationType'].name.lower() + allocation_type = get_allocation_type(schain) volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] if sync_node and archive: volume_limits['max_consensus_storage_bytes'] = MAX_CONSENSUS_STORAGE_INF_VALUE diff --git a/core/schains/config/skale_section.py b/core/schains/config/skale_section.py index 340644be1..5a1c413a4 100644 --- a/core/schains/config/skale_section.py +++ b/core/schains/config/skale_section.py @@ -100,5 +100,5 @@ def generate_skale_section( return SkaleConfig( contract_settings=contract_settings, node_info=node_info, - schain_info=schain_info, + schain_info=schain_info ) diff --git a/core/schains/limits.py b/core/schains/limits.py index 0e2021d20..24bc39547 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -17,8 +17,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from skale.dataclasses.schain_options import AllocationType, SchainOptions - from core.schains.types import SchainType, ContainerType, MetricType from tools.helper import read_json from tools.configs.resource_allocation import ( @@ -35,8 +33,8 @@ def get_schain_type(schain_part_of_node: int) -> SchainType: return SchainType(schain_part_of_node) -def get_allocation_type(schain_options: SchainOptions) -> str: - return schain_options.allocation_type.name.lower() +def get_allocation_type(schain: dict) -> str: + return schain['allocationType'].name.lower() def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: ContainerType, @@ -69,9 +67,8 @@ def get_ima_limit(schain_type: SchainType, metric_type: MetricType) -> int: return get_limit(metric_type, schain_type, ContainerType.ima, alloc) -def get_fs_allocated_storage(schain_type: SchainType, allocation_type: AllocationType) -> str: - allocation_type_name = allocation_type.name.lower() - volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type_name] +def get_fs_allocated_storage(schain_type: SchainType, allocation_type: str) -> str: + volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] return volume_limits[FILESTORAGE_LIMIT_OPTION_NAME] diff --git a/tests/schains/config/predeployed_test.py b/tests/schains/config/predeployed_test.py index aefdc24fe..a0b35dfd9 100644 --- a/tests/schains/config/predeployed_test.py +++ b/tests/schains/config/predeployed_test.py @@ -18,7 +18,7 @@ def test_generate_predeployed_accounts(): predeployed_section = generate_predeployed_accounts( schain_name='abc', schain_type=SchainType.medium, - allocation_type=AllocationType.DEFAULT, + allocation_type='default', schain_nodes={}, on_chain_owner='0xD1000000000000000000000000000000000000D1', mainnet_owner='0xD4000000000000000000000000000000000000D4', @@ -30,7 +30,7 @@ def test_generate_predeployed_accounts(): predeployed_section = generate_predeployed_accounts( schain_name='abc', schain_type=SchainType.medium, - allocation_type=AllocationType.DEFAULT, + allocation_type='default', schain_nodes={}, on_chain_owner='0xD1000000000000000000000000000000000000D1', mainnet_owner='0xD4000000000000000000000000000000000000D4', diff --git a/tests/test_generate_config_sync.py b/tests/test_generate_config_sync.py index e02dce9cb..70a5f9fab 100644 --- a/tests/test_generate_config_sync.py +++ b/tests/test_generate_config_sync.py @@ -1,6 +1,5 @@ import json import pytest -from skale.dataclasses.schain_options import AllocationType from skale.schain_config.rotation_history import get_previous_schain_groups from core.schains.config.predeployed import generate_predeployed_accounts @@ -49,7 +48,7 @@ def test_generate_config(skale): predeployed_accounts = generate_predeployed_accounts( schain_name=schain['name'], - allocation_type=AllocationType.DEFAULT, + allocation_type='default', schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, From ef0c444349d36fe791c944ac430b54e5c1e76339 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 22 Aug 2024 14:33:45 +0000 Subject: [PATCH 048/103] Fix resource allocation IMA cpu shares --- tests/skale-data/node_data/resource_allocation.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/skale-data/node_data/resource_allocation.json b/tests/skale-data/node_data/resource_allocation.json index f62f0dab2..49d8764aa 100644 --- a/tests/skale-data/node_data/resource_allocation.json +++ b/tests/skale-data/node_data/resource_allocation.json @@ -300,7 +300,7 @@ "cpu_shares": { "test4": 25, "test": 25, - "small": 1, + "small": 10, "medium": 25, "large": 204 }, @@ -312,4 +312,4 @@ "large": 2651359150 } } -} \ No newline at end of file +} From 958d075a690fd56ced3cee880b6a6fcf6b34980c Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 22 Aug 2024 16:08:20 +0000 Subject: [PATCH 049/103] Fix predeployed test --- tests/schains/config/predeployed_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/schains/config/predeployed_test.py b/tests/schains/config/predeployed_test.py index a0b35dfd9..7d85b3293 100644 --- a/tests/schains/config/predeployed_test.py +++ b/tests/schains/config/predeployed_test.py @@ -43,7 +43,7 @@ def test_generate_predeployed_accounts(): def test_generate_v1_predeployed_contracts(): v1_precompiled_contracts = generate_v1_predeployed_contracts( schain_type=SchainType.medium, - allocation_type=AllocationType.DEFAULT, + allocation_type='default', on_chain_owner=MARIONETTE_ADDRESS, mainnet_owner='0x0123456789Ab', message_proxy_for_schain_address='0x987654321fC', From 2108e8f75118bbafed447caf78f1df405f70e3bb Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 22 Aug 2024 18:22:28 +0000 Subject: [PATCH 050/103] Fix unused import --- tests/schains/config/predeployed_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/schains/config/predeployed_test.py b/tests/schains/config/predeployed_test.py index 7d85b3293..74b3a2a29 100644 --- a/tests/schains/config/predeployed_test.py +++ b/tests/schains/config/predeployed_test.py @@ -1,7 +1,6 @@ from marionette_predeployed import MARIONETTE_ADDRESS from etherbase_predeployed import ETHERBASE_ADDRESS from context_predeployed import CONTEXT_ADDRESS -from skale.dataclasses.schain_options import AllocationType from core.schains.types import SchainType from core.schains.config.predeployed import ( From d9a3ccd7ec376597dcd0b0f2aaa62e2af55cc4f6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 28 Aug 2024 10:49:41 +0000 Subject: [PATCH 051/103] Remove unused Runner class --- core/schains/monitor/pipeline.py | 40 -------------------------------- 1 file changed, 40 deletions(-) diff --git a/core/schains/monitor/pipeline.py b/core/schains/monitor/pipeline.py index cf9f73ae7..eaf5c2ccf 100644 --- a/core/schains/monitor/pipeline.py +++ b/core/schains/monitor/pipeline.py @@ -24,46 +24,6 @@ class Pipeline(NamedTuple): job: Callable -# class Runner: -# def __init__( -# self, -# pipelines: list[Pipeline], -# reporting_queue: queue.Queue, -# stuck_timeout: int = STUCK_TIMEOUT, -# shutdown_interval: int = SHUTDOWN_INTERVAL -# ) -> None: -# self.pipelines = pipelines -# self.reporting_queue = reporting_queue -# self.stuck_timeout = stuck_timeout -# self.shutdown_interval = shutdown_interval -# self.number = len(pipelines) -# -# def make_threads(self) -> None: -# init_ts = time.time() -# heartbeat_queues = [queue.Queue() for _ in range(self.number)] -# terminating_events = [threading.Event() for _ in range(self.number)] -# heartbeat_ts = [init_ts for _ in range(self.number)] -# -# return [ -# threading.Thread( -# name=pipeline.name, -# target=keep_pipeline, -# args=[heartbeat_queue, terminating_event, pipeline.job], -# ) -# for heartbeat_queue, terminating_event, pipeline in zip( -# heartbeat_queues, terminating_events, self.pipelines -# ) -# ] -# -# def run_threads(self, threads: list[threading.Thread]) -> None: -# for th in threads: -# th.start() -# -# def run(self, once: bool = False) -> None: -# threads = self.make_threads() -# self.run_threads(threads) - - def run_pipelines( pipelines: list[Pipeline], process_report: ProcessReport, From 63a80d96a64fcd6512e552a8af397c3b0b32de72 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 29 Aug 2024 18:10:05 +0000 Subject: [PATCH 052/103] Fix new chain --- core/schains/process.py | 1 + 1 file changed, 1 insertion(+) diff --git a/core/schains/process.py b/core/schains/process.py index 734bd7d8a..a66290d03 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -75,6 +75,7 @@ class ProcessReport: def __init__(self, name: str) -> None: self.path = pathlib.Path(SCHAINS_DIR_PATH).joinpath(name, self.REPORT_FILENAME) + self.path.parent.mkdir(parents=True, exist_ok=True) def is_exist(self) -> bool: return os.path.isfile(self.path) From 2d38c9ff4b53a59405573ff3b8f0142a06dda18a Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 4 Sep 2024 11:39:27 +0000 Subject: [PATCH 053/103] Switch to SchainStructure --- core/node.py | 4 +- core/schains/config/generator.py | 43 ++++---- core/schains/config/node_info.py | 4 +- core/schains/config/schain_info.py | 6 +- core/schains/config/skale_section.py | 6 +- core/schains/info.py | 4 +- core/schains/limits.py | 6 +- core/schains/monitor/action.py | 22 ++-- core/schains/monitor/containers.py | 46 ++++---- core/schains/monitor/main.py | 9 +- core/schains/monitor/rpc.py | 25 ++--- core/schains/runner.py | 20 ++-- core/schains/volume.py | 16 +-- tests/docker_utils_test.py | 10 +- tests/schains/checks_test.py | 4 +- tests/schains/cleaner_test.py | 8 +- tests/schains/config/generator_test.py | 102 ++++++------------ tests/schains/config/node_info_test.py | 7 +- .../monitor/action/skaled_action_test.py | 4 +- tests/schains/monitor/containers_test.py | 11 +- tests/schains/monitor/rpc_test.py | 13 +-- tests/schains/monitor/skaled_monitor_test.py | 4 +- tests/test_generate_config_sync.py | 12 +-- tests/utils.py | 35 +++--- 24 files changed, 201 insertions(+), 220 deletions(-) diff --git a/core/node.py b/core/node.py index 72df5bf00..86c451e48 100644 --- a/core/node.py +++ b/core/node.py @@ -194,7 +194,7 @@ def get_exit_status(self): self.config.id) schain_statuses = [ { - 'name': schain['name'], + 'name': schain.name, 'status': SchainExitStatus.ACTIVE.name } for schain in active_schains @@ -207,7 +207,7 @@ def get_exit_status(self): status = SchainExitStatus.LEFT else: status = SchainExitStatus.LEAVING - schain_name = self.skale.schains.get(schain['schain_id'])['name'] + schain_name = self.skale.schains.get(schain['schain_id']).name if not schain_name: schain_name = '[REMOVED]' schain_statuses.append( diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index c85ec6b1f..7c649a37c 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -21,6 +21,7 @@ from dataclasses import dataclass from skale import Skale +from skale.contracts.manager.schains import SchainStructure from skale.schain_config.generator import get_schain_nodes_with_schains from skale.schain_config.ports_allocation import get_schain_base_port_on_node from skale.schain_config.rotation_history import get_previous_schain_groups @@ -88,26 +89,26 @@ def to_dict(self): } -def get_on_chain_owner(schain: dict, generation: int, is_owner_contract: bool) -> str: +def get_on_chain_owner(schain: SchainStructure, generation: int, is_owner_contract: bool) -> str: """ Returns on-chain owner depending on sChain generation. """ if not is_owner_contract: - return schain['mainnetOwner'] + return schain.mainnet_owner if generation >= Gen.ONE: return MARIONETTE_ADDRESS if generation == Gen.ZERO: - return schain['mainnetOwner'] + return schain.mainnet_owner -def get_on_chain_etherbase(schain: dict, generation: int) -> str: +def get_on_chain_etherbase(schain: SchainStructure, generation: int) -> str: """ Returns on-chain owner depending on sChain generation. """ if generation >= Gen.ONE: return ETHERBASE_ADDRESS if generation == Gen.ZERO: - return schain['mainnetOwner'] + return schain.mainnet_owner def get_schain_id_for_chain(schain_name: str, generation: int) -> int: @@ -120,17 +121,17 @@ def get_schain_id_for_chain(schain_name: str, generation: int) -> int: return 1 -def get_schain_originator(schain: dict): +def get_schain_originator(schain: SchainStructure) -> str: """ Returns address that will be used as an sChain originator """ - if is_zero_address(schain['originator']): - return schain['mainnetOwner'] - return schain['originator'] + if is_zero_address(schain.originator): + return schain.mainnet_owner + return schain.originator def generate_schain_config( - schain: dict, node_id: int, node: dict, ecdsa_key_name: str, + schain: SchainStructure, node_id: int, node: dict, ecdsa_key_name: str, rotation_id: int, schain_nodes_with_schains: list, node_groups: list, generation: int, is_owner_contract: bool, skale_manager_opts: SkaleManagerOpts, schain_base_port: int, common_bls_public_keys: list[str], @@ -139,7 +140,7 @@ def generate_schain_config( ) -> SChainConfig: """Main function that is used to generate sChain config""" logger.info( - f'Going to generate sChain config for {schain["name"]}, ' + f'Going to generate sChain config for {schain.name}, ' f'node_name: {node["name"]}, node_id: {node_id}, rotation_id: {rotation_id}' ) if sync_node: @@ -149,15 +150,15 @@ def generate_schain_config( on_chain_etherbase = get_on_chain_etherbase(schain, generation) on_chain_owner = get_on_chain_owner(schain, generation, is_owner_contract) - mainnet_owner = schain['mainnetOwner'] - schain_type = get_schain_type(schain['partOfNode']) + mainnet_owner = schain.mainnet_owner + schain_type = get_schain_type(schain.part_of_node) - schain_id = get_schain_id_for_chain(schain['name'], generation) + schain_id = get_schain_id_for_chain(schain.name, generation) base_config = SChainBaseConfig(BASE_SCHAIN_CONFIG_FILEPATH) dynamic_params = { - 'chainID': get_chain_id(schain['name']) + 'chainID': get_chain_id(schain.name) } originator_address = get_schain_originator(schain) @@ -182,13 +183,13 @@ def generate_schain_config( ) accounts = {} - if is_static_accounts(schain['name']): - logger.info(f'Found static account for {schain["name"]}, going to use in config') - accounts = static_accounts(schain['name'])['accounts'] + if is_static_accounts(schain.name): + logger.info(f'Found static account for {schain.name}, going to use in config') + accounts = static_accounts(schain.name)['accounts'] else: logger.info('Static accounts not found, generating regular accounts section') predeployed_accounts = generate_predeployed_accounts( - schain_name=schain['name'], + schain_name=schain.name, allocation_type=get_allocation_type(schain), schain_type=schain_type, schain_nodes=schain_nodes_with_schains, @@ -236,7 +237,7 @@ def generate_schain_config_with_skale( node = skale.nodes.get(node_config.id) node_groups = get_previous_schain_groups(skale, schain_name) - is_owner_contract = is_address_contract(skale.web3, schain['mainnetOwner']) + is_owner_contract = is_address_contract(skale.web3, schain.mainnet_owner) skale_manager_opts = init_skale_manager_opts(skale) group_index = skale.schains.name_to_id(schain_name) @@ -247,7 +248,7 @@ def generate_schain_config_with_skale( else: schain_base_port = get_schain_base_port_on_node( schains_on_node, - schain['name'], + schain.name, node['port'] ) diff --git a/core/schains/config/node_info.py b/core/schains/config/node_info.py index e4bb3386b..c535ff6af 100644 --- a/core/schains/config/node_info.py +++ b/core/schains/config/node_info.py @@ -49,8 +49,8 @@ class CurrentNodeInfo(NodeInfo): archive: bool catchup: bool + """Returns camel-case representation of the CurrentNodeInfo object""" def to_dict(self): - """Returns camel-case representation of the CurrentNodeInfo object""" node_info = { **super().to_dict(), **{ @@ -81,7 +81,7 @@ def generate_current_node_info( sync_node: bool = False, archive: bool = False, catchup: bool = False ) -> CurrentNodeInfo: wallets = generate_wallets_config( - schain['name'], + schain.name, rotation_id, sync_node, nodes_in_schain, diff --git a/core/schains/config/schain_info.py b/core/schains/config/schain_info.py index 8397108f5..497a218c9 100644 --- a/core/schains/config/schain_info.py +++ b/core/schains/config/schain_info.py @@ -75,7 +75,7 @@ def generate_schain_info( sync_node: bool, archive: bool ) -> SChainInfo: - schain_type = get_schain_type(schain['partOfNode']) + schain_type = get_schain_type(schain.part_of_node) allocation_type = get_allocation_type(schain) volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] if sync_node and archive: @@ -86,13 +86,13 @@ def generate_schain_info( return SChainInfo( schain_id=schain_id, - name=schain['name'], + name=schain.name, block_author=on_chain_etherbase, contract_storage_limit=contract_storage_limit, db_storage_limit=db_storage_limit, node_groups=node_groups, nodes=nodes, - multitransaction_mode=schain['multitransactionMode'], + multitransaction_mode=schain.options.multitransaction_mode, static_schain_info=static_schain_info, **volume_limits ) diff --git a/core/schains/config/skale_section.py b/core/schains/config/skale_section.py index 5a1c413a4..452a08007 100644 --- a/core/schains/config/skale_section.py +++ b/core/schains/config/skale_section.py @@ -58,9 +58,9 @@ def generate_skale_section( schain_nodes=schain_nodes_with_schains ) - schain_type = get_schain_type(schain['partOfNode']) + schain_type = get_schain_type(schain.part_of_node) static_node_info = get_static_node_info(schain_type) - static_schain_info = get_static_schain_info(schain['name']) + static_schain_info = get_static_schain_info(schain.name) nodes_in_schain = len(schain_nodes_with_schains) node_info = generate_current_node_info( @@ -81,7 +81,7 @@ def generate_skale_section( schain_nodes = generate_schain_nodes( schain_nodes_with_schains=schain_nodes_with_schains, - schain_name=schain['name'], + schain_name=schain.name, rotation_id=rotation_id, sync_node=sync_node ) diff --git a/core/schains/info.py b/core/schains/info.py index ee0ec0ae3..0046b3980 100644 --- a/core/schains/info.py +++ b/core/schains/info.py @@ -47,8 +47,8 @@ def get_schain_info_by_name(skale: Skale, schain_name: str) -> SchainData: return SchainData( schain_name, sid, - contracts_info['mainnetOwner'], - contracts_info['partOfNode'], + contracts_info.mainnet_owner, + contracts_info.part_of_node, record.dkg_status, record.is_deleted, record.first_run, diff --git a/core/schains/limits.py b/core/schains/limits.py index 24bc39547..4a4b03339 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -17,6 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from skale.contracts.manager.schains import SchainStructure + from core.schains.types import SchainType, ContainerType, MetricType from tools.helper import read_json from tools.configs.resource_allocation import ( @@ -33,8 +35,8 @@ def get_schain_type(schain_part_of_node: int) -> SchainType: return SchainType(schain_part_of_node) -def get_allocation_type(schain: dict) -> str: - return schain['allocationType'].name.lower() +def get_allocation_type(schain: SchainStructure) -> str: + return schain.options.allocation_type.name.lower() def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: ContainerType, diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index fb09d98c5..1002bf8a9 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -153,7 +153,7 @@ def __init__( ): self.skale = skale self.schain = schain - self.generation = schain['generation'] + self.generation = schain.generation self.node_config = node_config self.checks = checks self.stream_version = stream_version @@ -162,13 +162,13 @@ def __init__( self.rotation_data = rotation_data self.rotation_id = rotation_data['rotation_id'] self.estate = estate - self.econfig = econfig or ExternalConfig(name=schain['name']) + self.econfig = econfig or ExternalConfig(name=schain.name) self.node_options = node_options or NodeOptions() self.cfm: ConfigFileManager = ConfigFileManager( - schain_name=self.schain['name'] + schain_name=self.schain.name ) self.statsd_client = get_statsd_client() - super().__init__(name=schain['name']) + super().__init__(name=schain.name) @BaseActionManager.monitor_block def config_dir(self) -> bool: @@ -304,25 +304,25 @@ def __init__( node_options: NodeOptions = None ): self.schain = schain - self.generation = schain['generation'] + self.generation = schain.generation self.checks = checks self.node_config = node_config self.rc = rule_controller - self.skaled_status = init_skaled_status(self.schain['name']) - self.schain_type = get_schain_type(schain['partOfNode']) - self.econfig = econfig or ExternalConfig(schain['name']) + self.skaled_status = init_skaled_status(self.schain.name) + self.schain_type = get_schain_type(schain.part_of_node) + self.econfig = econfig or ExternalConfig(schain.name) self.cfm: ConfigFileManager = ConfigFileManager( - schain_name=self.schain['name'] + schain_name=self.schain.name ) - self.esfm = ExitScheduleFileManager(schain['name']) + self.esfm = ExitScheduleFileManager(schain.name) self.dutils = dutils or DockerUtils() self.statsd_client = get_statsd_client() self.node_options = node_options or NodeOptions() - super().__init__(name=schain['name']) + super().__init__(name=schain.name) @BaseActionManager.monitor_block def volume(self) -> bool: diff --git a/core/schains/monitor/containers.py b/core/schains/monitor/containers.py index 001121395..311525766 100644 --- a/core/schains/monitor/containers.py +++ b/core/schains/monitor/containers.py @@ -20,6 +20,7 @@ import logging import time from typing import Optional +from skale.contracts.manager.schains import SchainStructure from core.schains.volume import is_volume_exists from core.schains.runner import ( @@ -50,7 +51,7 @@ def monitor_schain_container( - schain, + schain: SchainStructure, schain_record, skaled_status, download_snapshot=False, @@ -61,22 +62,22 @@ def monitor_schain_container( historic_state: bool = False ) -> None: dutils = dutils or DockerUtils() - schain_name = schain['name'] - logger.info(f'Monitoring container for sChain {schain_name}') + schain.name = schain.name + logger.info(f'Monitoring container for sChain {schain.name}') - if not is_volume_exists(schain_name, sync_node=sync_node, dutils=dutils): - logger.error(f'Data volume for sChain {schain_name} does not exist') + if not is_volume_exists(schain.name, sync_node=sync_node, dutils=dutils): + logger.error(f'Data volume for sChain {schain.name} does not exist') return if skaled_status.exit_time_reached and abort_on_exit: logger.info( - f'{schain_name} - Skipping container monitor: exit time reached') + f'{schain.name} - Skipping container monitor: exit time reached') skaled_status.log() schain_record.reset_failed_counters() return - if not is_container_exists(schain_name, dutils=dutils): - logger.info(f'SChain {schain_name}: container doesn\'t exits') + if not is_container_exists(schain.name, dutils=dutils): + logger.info(f'SChain {schain.name}: container doesn\'t exits') run_schain_container( schain=schain, download_snapshot=download_snapshot, @@ -92,14 +93,14 @@ def monitor_schain_container( if skaled_status.clear_data_dir and skaled_status.start_from_snapshot: logger.info( - f'{schain_name} - Skipping container monitor: sChain should be repaired') + f'{schain.name} - Skipping container monitor: sChain should be repaired') skaled_status.log() schain_record.reset_failed_counters() return - if is_schain_container_failed(schain_name, dutils=dutils): + if is_schain_container_failed(schain.name, dutils=dutils): if schain_record.restart_count < MAX_SCHAIN_RESTART_COUNT: - logger.info('sChain %s: restarting container', schain_name) + logger.info('sChain %s: restarting container', schain.name) restart_container(SCHAIN_CONTAINER, schain, dutils=dutils) update_ssl_change_date(schain_record) schain_record.set_restart_count(schain_record.restart_count + 1) @@ -107,7 +108,7 @@ def monitor_schain_container( else: logger.warning( 'SChain %s: max restart count exceeded - %d', - schain_name, + schain.name, MAX_SCHAIN_RESTART_COUNT ) else: @@ -120,42 +121,41 @@ def monitor_ima_container( migration_ts: int = 0, dutils: DockerUtils = None ) -> None: - schain_name = schain["name"] if SYNC_NODE: return if not ima_data.linked: - logger.info(f'{schain_name} - not registered in IMA, skipping') + logger.info(f'{schain.name} - not registered in IMA, skipping') return - copy_schain_ima_abi(schain_name) + copy_schain_ima_abi(schain.name) container_exists = is_container_exists( - schain_name, container_type=IMA_CONTAINER, dutils=dutils) + schain.name, container_type=IMA_CONTAINER, dutils=dutils) if time.time() > migration_ts: logger.debug('IMA migration time passed') image = get_image_name(image_type=IMA_CONTAINER, new=True) - time_frame = get_ima_time_frame(schain_name, after=True) + time_frame = get_ima_time_frame(schain.name, after=True) if container_exists: - container_image = get_container_image(schain_name, IMA_CONTAINER, dutils) - container_time_frame = get_ima_container_time_frame(schain_name, dutils) + container_image = get_container_image(schain.name, IMA_CONTAINER, dutils) + container_time_frame = get_ima_container_time_frame(schain.name, dutils) if image != container_image or time_frame != container_time_frame: logger.info('Removing old container as part of IMA migration') - remove_container(schain_name, IMA_CONTAINER, dutils) + remove_container(schain.name, IMA_CONTAINER, dutils) container_exists = False else: - time_frame = get_ima_time_frame(schain_name, after=False) + time_frame = get_ima_time_frame(schain.name, after=False) image = get_image_name(image_type=IMA_CONTAINER, new=False) logger.debug('IMA time frame %d', time_frame) if not container_exists: logger.info( '%s No IMA container, creating, image %s, time frame %d', - schain_name, image, time_frame + schain.name, image, time_frame ) run_ima_container( schain, @@ -166,4 +166,4 @@ def monitor_ima_container( ) else: logger.debug( - 'sChain %s: IMA container exists, but not running, skipping', schain_name) + 'sChain %s: IMA container exists, but not running, skipping', schain.name) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 58010d347..88170a327 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -27,6 +27,7 @@ from typing import List, Optional from skale import Skale, SkaleIma +from skale.contracts.manager.schains import SchainStructure from web3._utils import request as web3_request from core.node import get_skale_node_version @@ -105,9 +106,7 @@ def run_config_pipeline( if SYNC_NODE: logger.info( - 'Sync node last_dkg_successful %s, rotation_data %s', - last_dkg_successful, - rotation_data + 'Sync node last_dkg_successful %s, rotation_data %s', last_dkg_successful, rotation_data ) mon = SyncConfigMonitor(config_am, config_checks) else: @@ -124,7 +123,7 @@ def run_config_pipeline( def run_skaled_pipeline( - skale: Skale, schain: Dict, node_config: NodeConfig, dutils: DockerUtils + skale: Skale, schain: SchainStructure, node_config: NodeConfig, dutils: DockerUtils ) -> None: name = schain['name'] schain_record = SChainRecord.get_by_name(name) @@ -134,7 +133,7 @@ def run_skaled_pipeline( rc = get_default_rule_controller(name=name) skaled_checks = SkaledChecks( - schain_name=schain['name'], + schain_name=schain.name, schain_record=schain_record, rule_controller=rc, dutils=dutils, diff --git a/core/schains/monitor/rpc.py b/core/schains/monitor/rpc.py index 4632ce918..5d9564221 100644 --- a/core/schains/monitor/rpc.py +++ b/core/schains/monitor/rpc.py @@ -19,6 +19,8 @@ import logging +from skale.contracts.manager.schains import SchainStructure + from core.schains.runner import restart_container from core.schains.runner import is_container_exists, is_container_running from tools.docker_utils import DockerUtils @@ -33,31 +35,30 @@ def handle_failed_schain_rpc( - schain, + schain: SchainStructure, schain_record, skaled_status, dutils=None ): dutils = dutils or DockerUtils() - schain_name = schain['name'] - logger.info(f'Monitoring RPC for sChain {schain_name}') + logger.info(f'Monitoring RPC for sChain {schain.name}') - if not is_container_exists(schain_name, dutils=dutils): - logger.warning(f'{schain_name} RPC monitor failed: container doesn\'t exit') + if not is_container_exists(schain.name, dutils=dutils): + logger.warning(f'{schain.name} RPC monitor failed: container doesn\'t exit') return - if not is_container_running(schain_name, dutils=dutils): - logger.warning(f'{schain_name} RPC monitor failed: container is not running') + if not is_container_running(schain.name, dutils=dutils): + logger.warning(f'{schain.name} RPC monitor failed: container is not running') return if skaled_status.exit_time_reached: - logger.info(f'{schain_name} - Skipping RPC monitor: exit time reached') + logger.info(f'{schain.name} - Skipping RPC monitor: exit time reached') skaled_status.log() schain_record.set_failed_rpc_count(0) return if skaled_status.downloading_snapshot: - logger.info(f'{schain_name} - Skipping RPC monitor: downloading snapshot') + logger.info(f'{schain.name} - Skipping RPC monitor: downloading snapshot') skaled_status.log() schain_record.set_failed_rpc_count(0) return @@ -65,18 +66,18 @@ def handle_failed_schain_rpc( rpc_stuck = schain_record.failed_rpc_count > MAX_SCHAIN_FAILED_RPC_COUNT logger.info( 'SChain %s, rpc stuck: %s, failed_rpc_count: %d, restart_count: %d', - schain_name, + schain.name, rpc_stuck, schain_record.failed_rpc_count, schain_record.restart_count ) if rpc_stuck: if schain_record.restart_count < MAX_SCHAIN_RESTART_COUNT: - logger.info(f'SChain {schain_name}: restarting container') + logger.info(f'SChain {schain.name}: restarting container') restart_container(SCHAIN_CONTAINER, schain, dutils=dutils) schain_record.set_restart_count(schain_record.restart_count + 1) else: - logger.warning(f'SChain {schain_name}: max restart count exceeded') + logger.warning(f'SChain {schain.name}: max restart count exceeded') schain_record.set_failed_rpc_count(0) else: schain_record.set_failed_rpc_count(schain_record.failed_rpc_count + 1) diff --git a/core/schains/runner.py b/core/schains/runner.py index e65aa6394..14d0304e2 100644 --- a/core/schains/runner.py +++ b/core/schains/runner.py @@ -21,6 +21,7 @@ import logging from docker.types import LogConfig, Ulimit +from skale.contracts.manager.schains import SchainStructure from core.schains.volume import get_schain_volume_config from core.schains.limits import get_schain_limit, get_ima_limit, get_schain_type @@ -161,13 +162,12 @@ def run_container( def restart_container( type, - schain, + schain: SchainStructure, timeout=SCHAIN_STOP_TIMEOUT, dutils=None ): dutils = dutils or DockerUtils() - schain_name = schain['name'] - container_name = get_container_name(type, schain_name) + container_name = get_container_name(type, schain.name) logger.info(arguments_list_string({'Container name': container_name}, 'Restarting container...')) @@ -176,7 +176,7 @@ def restart_container( def run_schain_container( - schain, + schain: SchainStructure, download_snapshot=False, start_ts=None, dutils=None, @@ -187,8 +187,8 @@ def run_schain_container( sync_node=False, historic_state=False ): - schain_name = schain['name'] - schain_type = get_schain_type(schain['partOfNode']) + schain_name = schain.name + schain_type = get_schain_type(schain.part_of_node) cpu_limit = None if sync_node else get_schain_limit(schain_type, MetricType.cpu_shares) mem_limit = None if sync_node else get_schain_limit(schain_type, MetricType.mem) @@ -224,22 +224,22 @@ def run_schain_container( def run_ima_container( - schain: dict, + schain: SchainStructure, mainnet_chain_id: int, time_frame: int, image: str, dutils: DockerUtils = None ) -> None: dutils = dutils or DockerUtils() - env = get_ima_env(schain['name'], mainnet_chain_id, time_frame) + env = get_ima_env(schain.name, mainnet_chain_id, time_frame) - schain_type = get_schain_type(schain['partOfNode']) + schain_type = get_schain_type(schain.part_of_node) cpu_limit = get_ima_limit(schain_type, MetricType.cpu_shares) mem_limit = get_ima_limit(schain_type, MetricType.mem) run_container( image_type=IMA_CONTAINER, - schain_name=schain['name'], + schain_name=schain.name, env=env.to_dict(), cpu_shares_limit=cpu_limit, mem_limit=mem_limit, diff --git a/core/schains/volume.py b/core/schains/volume.py index dbba93cd4..422a49bb0 100644 --- a/core/schains/volume.py +++ b/core/schains/volume.py @@ -21,6 +21,7 @@ import os import shutil +from skale.contracts.manager.schains import SchainStructure from core.schains.limits import get_schain_limit, get_schain_type from core.schains.types import MetricType from tools.configs.schains import SCHAIN_STATE_PATH, SCHAIN_STATIC_PATH @@ -45,24 +46,23 @@ def is_volume_exists(schain_name, sync_node=False, dutils=None): def init_data_volume( - schain: dict, + schain: SchainStructure, sync_node: bool = False, dutils: DockerUtils = None ): dutils = dutils or DockerUtils() - schain_name = schain['name'] - if is_volume_exists(schain_name, sync_node=sync_node, dutils=dutils): - logger.debug(f'Volume already exists: {schain_name}') + if is_volume_exists(schain.name, sync_node=sync_node, dutils=dutils): + logger.debug(f'Volume already exists: {schain.name}') return - logger.info(f'Creating volume for schain: {schain_name}') + logger.info(f'Creating volume for schain: {schain.name}') if sync_node: - ensure_data_dir_path(schain['name']) + ensure_data_dir_path(schain.name) else: - schain_type = get_schain_type(schain['partOfNode']) + schain_type = get_schain_type(schain.part_of_node) disk_limit = get_schain_limit(schain_type, MetricType.disk) - dutils.create_data_volume(schain_name, disk_limit) + dutils.create_data_volume(schain.name, disk_limit) def remove_data_dir(schain_name): diff --git a/tests/docker_utils_test.py b/tests/docker_utils_test.py index 731d7baa0..48320d796 100644 --- a/tests/docker_utils_test.py +++ b/tests/docker_utils_test.py @@ -14,7 +14,7 @@ get_container_info ) from tests.utils import ( - get_schain_contracts_data, + get_schain_struct, run_simple_schain_container, run_simple_schain_container_in_sync_mode ) @@ -104,7 +104,7 @@ def test_run_schain_container( skaled_mock_image ): schain_name = schain_config['skaleConfig']['sChain']['schainName'] - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) # Run schain container run_simple_schain_container(schain_data, dutils) @@ -123,7 +123,7 @@ def test_run_schain_container_sync( cert_key_pair ): schain_name = schain_config['skaleConfig']['sChain']['schainName'] - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) run_schain_container( schain_data, @@ -165,7 +165,7 @@ def test_run_schain_container_in_sync_mode( skaled_mock_image ): schain_name = schain_config['skaleConfig']['sChain']['schainName'] - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) # Run schain container run_simple_schain_container_in_sync_mode(schain_data, dutils) @@ -320,7 +320,7 @@ def test_get_container_image_name( skaled_mock_image ): schain_name = schain_config['skaleConfig']['sChain']['schainName'] - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) # Run schain container run_simple_schain_container(schain_data, dutils) diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 2e86f4dca..4baa89dce 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -32,7 +32,7 @@ from tests.utils import ( CONFIG_STREAM, generate_schain_config, - get_schain_contracts_data, + get_schain_struct, response_mock, request_mock ) @@ -207,7 +207,7 @@ def test_ima_container_check(schain_checks, cleanup_ima_containers, dutils): ts = int(time.time()) mts = ts + 3600 name = schain_checks.name - schain = get_schain_contracts_data(name) + schain = get_schain_struct(name) image = get_image_name(image_type=IMA_CONTAINER) # new_image = get_image_name(type=IMA_CONTAINER, new=True) diff --git a/tests/schains/cleaner_test.py b/tests/schains/cleaner_test.py index c9df7b224..d16b41fd3 100644 --- a/tests/schains/cleaner_test.py +++ b/tests/schains/cleaner_test.py @@ -28,7 +28,7 @@ SChainRecord, mark_schain_deleted, upsert_schain_record) -from tests.utils import (get_schain_contracts_data, +from tests.utils import (get_schain_struct, run_simple_schain_container, run_simple_ima_container) @@ -124,7 +124,7 @@ def test_remove_schain_volume(dutils, schain_config): def schain_container(schain_config, ssl_folder, dutils): """ Creates and removes schain container """ schain_name = schain_config['skaleConfig']['sChain']['schainName'] - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) try: run_simple_schain_container(schain_data, dutils) yield schain_name @@ -147,7 +147,7 @@ def test_remove_schain_container( cert_key_pair ): schain_name = schain_config['skaleConfig']['sChain']['schainName'] - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) run_simple_schain_container(schain_data, dutils) container_name = SCHAIN_CONTAINER_NAME_TEMPLATE.format(schain_name) assert is_container_running(dutils, container_name) @@ -158,7 +158,7 @@ def test_remove_schain_container( @pytest.mark.skip('Docker API GA issues need to be resolved') def test_remove_ima_container(dutils, schain_container): schain_name = schain_container - schain_data = get_schain_contracts_data(schain_name) + schain_data = get_schain_struct(schain_name) with mock.patch('core.schains.runner.get_ima_env', return_value=ImaEnv( schain_dir='/' )): diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 3006a0a4a..6eff085cc 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -3,6 +3,7 @@ from pathlib import Path import pytest +from skale.contracts.manager.schains import SchainStructure from skale.dataclasses.schain_options import AllocationType from etherbase_predeployed import ETHERBASE_ADDRESS, ETHERBASE_IMPLEMENTATION_ADDRESS from marionette_predeployed import MARIONETTE_ADDRESS, MARIONETTE_IMPLEMENTATION_ADDRESS @@ -24,6 +25,8 @@ from tools.configs.schains import SCHAINS_DIR_PATH from tools.node_options import NodeOptions +from tests.utils import get_schain_struct, TEST_ORIGINATOR_ADDRESS, TEST_MAINNET_OWNER_ADDRESS + NODE_ID = 1 ECDSA_KEY_NAME = 'TEST:KEY:NAME' @@ -41,44 +44,22 @@ ], } -TEST_ORIGINATOR_ADDRESS = '0x0B5e3eBB74eE281A24DDa3B1A4e70692c15EAC34' -TEST_MAINNET_OWNER_ADDRESS = '0x30E1C96277735B03E59B3098204fd04FD0e78a46' - TEST_NODE = {'id': 1, 'name': 'test', 'publicKey': '0x5556', 'port': 10000} -SCHAIN_WITHOUT_ORIGINATOR = { - 'name': 'test_schain', - 'partOfNode': 0, - 'generation': 1, - 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, - 'originator': '0x0000000000000000000000000000000000000000', - 'multitransactionMode': True, - 'allocationType': AllocationType.DEFAULT -} +def get_schain_struct_no_originator() -> SchainStructure: + schain = get_schain_struct(schain_name='test_schain') + schain.originator = '0x0000000000000000000000000000000000000000' + return schain -SCHAIN_WITH_ORIGINATOR = { - 'name': 'test_schain', - 'partOfNode': 0, - 'generation': 1, - 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, - 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True, - 'allocationType': AllocationType.DEFAULT -} -SCHAIN_WITH_STATIC_ACCOUNTS = { - 'name': 'static_chain', - 'partOfNode': 0, - 'generation': 1, - 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, - 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True, - 'allocationType': AllocationType.DEFAULT -} +def get_schain_struct_static_account() -> SchainStructure: + schain = get_schain_struct(schain_name='static_chain') + return schain def get_schain_node_with_schains(schain_name: str) -> list: + schain = get_schain_struct(schain_name=schain_name) return [ { 'name': 'test', @@ -87,7 +68,7 @@ def get_schain_node_with_schains(schain_name: str) -> list: 'publicKey': '0x0B5e3eBB74eE281A24DDa3B1A4e70692c15EAC34', 'port': 10000, 'id': 1, - 'schains': [{'name': schain_name}], + 'schains': [schain], } ] @@ -257,22 +238,12 @@ def test_generate_schain_config_with_skale( def test_generate_schain_config_gen0(schain_secret_key_file_default_chain, skale_manager_opts): - schain = { - 'name': 'test_schain', - 'partOfNode': 0, - 'generation': 0, - 'mainnetOwner': '0x30E1C96277735B03E59B3098204fd04FD0e78a46', - 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True, - 'allocationType': AllocationType.DEFAULT - } - node_id, generation, rotation_id = 1, 0, 0 ecdsa_key_name = 'test' node_groups = {} schain_config = generate_schain_config( - schain=schain, + schain=get_schain_struct(schain_name='test_schain'), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -297,7 +268,7 @@ def test_generate_schain_config_gen1(schain_secret_key_file_default_chain, skale node_groups = {} schain_config = generate_schain_config( - schain=SCHAIN_WITH_ORIGINATOR, + schain=get_schain_struct(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -342,7 +313,7 @@ def test_generate_schain_config_gen1_pk_owner( node_groups = {} schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -369,7 +340,7 @@ def test_generate_schain_config_gen2_schain_id( node_groups = {} schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -391,7 +362,7 @@ def test_generate_schain_config_gen1_schain_id( ): node_id, generation, rotation_id = 1, 1, 0 schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name='test', @@ -413,7 +384,7 @@ def test_generate_schain_config_gen0_schain_id( ): node_id, generation, rotation_id = 1, 0, 0 schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name='test', @@ -437,15 +408,9 @@ def test_generate_schain_config_allocation_type( ecdsa_key_name = 'test' node_groups = {} - schain = { - 'name': 'test_schain', - 'partOfNode': 0, - 'generation': 1, - 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, - 'originator': '0x0000000000000000000000000000000000000000', - 'multitransactionMode': True, - 'allocationType': AllocationType.NO_FILESTORAGE, - } + schain = get_schain_struct(schain_name='test_schain') + schain.options.allocation_type = AllocationType.NO_FILESTORAGE + schain_config = generate_schain_config( schain=schain, node=TEST_NODE, @@ -465,7 +430,8 @@ def test_generate_schain_config_allocation_type( assert config['skaleConfig']['sChain']['maxSkaledLeveldbStorageBytes'] == 94904996659 assert config['skaleConfig']['sChain']['maxFileStorageBytes'] == 0 - schain['allocationType'] = AllocationType.MAX_CONSENSUS_DB + schain = get_schain_struct(schain_name='test_schain') + schain.options.allocation_type = AllocationType.MAX_CONSENSUS_DB schain_config = generate_schain_config( schain=schain, @@ -508,10 +474,10 @@ def test_generate_schain_config_with_skale_gen2( def test_get_schain_originator(predeployed_ima): - originator = get_schain_originator(SCHAIN_WITHOUT_ORIGINATOR) + originator = get_schain_originator(get_schain_struct_no_originator()) assert originator == TEST_MAINNET_OWNER_ADDRESS - originator = get_schain_originator(SCHAIN_WITH_ORIGINATOR) + originator = get_schain_originator(get_schain_struct()) assert originator == TEST_ORIGINATOR_ADDRESS @@ -521,7 +487,7 @@ def test_generate_sync_node_config(schain_secret_key_file_default_chain, skale_m node_groups = {} schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -549,7 +515,7 @@ def test_generate_sync_node_config_archive_catchup( node_groups = {} schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -570,7 +536,7 @@ def test_generate_sync_node_config_archive_catchup( assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < 1000000000000000000 schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -593,7 +559,7 @@ def test_generate_sync_node_config_archive_catchup( assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < 1000000000000000000 schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -616,7 +582,7 @@ def test_generate_sync_node_config_archive_catchup( assert config['skaleConfig']['sChain'].get('maxConsensusStorageBytes') < 1000000000000000000 schain_config = generate_schain_config( - schain=SCHAIN_WITHOUT_ORIGINATOR, + schain=get_schain_struct_no_originator(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -647,7 +613,7 @@ def test_generate_sync_node_config_static_accounts( node_groups = {} schain_config = generate_schain_config( - schain=SCHAIN_WITH_STATIC_ACCOUNTS, + schain=get_schain_struct_static_account(), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, @@ -665,13 +631,15 @@ def test_generate_sync_node_config_static_accounts( assert config['accounts'].get('0x1111111') assert config['accounts']['0x1111111']['balance'] == '1000000000000000000000000000000' + schain = get_schain_struct() + schain_config = generate_schain_config( - schain=SCHAIN_WITH_ORIGINATOR, + schain=schain, node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, rotation_id=rotation_id, - schain_nodes_with_schains=get_schain_node_with_schains('test_schain'), + schain_nodes_with_schains=get_schain_node_with_schains(schain.name), node_groups=node_groups, generation=generation, is_owner_contract=False, diff --git a/tests/schains/config/node_info_test.py b/tests/schains/config/node_info_test.py index 9b3278d41..d25c905cf 100644 --- a/tests/schains/config/node_info_test.py +++ b/tests/schains/config/node_info_test.py @@ -4,6 +4,7 @@ from core.schains.config.node_info import generate_wallets_config, generate_current_node_info from core.schains.types import SchainType from tools.configs import SGX_SSL_KEY_FILEPATH, SGX_SSL_CERT_FILEPATH +from tests.utils import get_schain_struct COMMON_PUBLIC_KEY = [1, 2, 3, 4] @@ -77,7 +78,7 @@ def test_generate_current_node_info( node_id=1, ecdsa_key_name='123', static_node_info=static_node_info, - schain={'name': _schain_name, 'partOfNode': 0}, + schain=get_schain_struct(schain_name=_schain_name), rotation_id=0, skale_manager_opts=skale_manager_opts, nodes_in_schain=4, @@ -103,7 +104,7 @@ def test_generate_current_node_info( node_id=1, ecdsa_key_name='123', static_node_info=static_node_info, - schain={'name': _schain_name, 'partOfNode': 0}, + schain=get_schain_struct(schain_name=_schain_name), rotation_id=0, skale_manager_opts=skale_manager_opts, nodes_in_schain=4, @@ -131,7 +132,7 @@ def test_skale_manager_opts( node_id=1, ecdsa_key_name='123', static_node_info=static_node_info, - schain={'name': _schain_name, 'partOfNode': 0}, + schain=get_schain_struct(schain_name=_schain_name), rotation_id=0, skale_manager_opts=skale_manager_opts, nodes_in_schain=4, diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 3281ec6bd..d02b18037 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -31,7 +31,7 @@ def run_ima_container_mock( dutils=None ): image_name, container_name, _, _ = get_container_info( - IMA_CONTAINER, schain['name']) + IMA_CONTAINER, schain.name) image = image or image_name dutils.safe_rm(container_name) dutils.run_container( @@ -53,7 +53,7 @@ def monitor_schain_container_mock( historic_state=False ): image_name, container_name, _, _ = get_container_info( - SCHAIN_CONTAINER, schain['name']) + SCHAIN_CONTAINER, schain.name) dutils.safe_rm(container_name) if not skaled_status.exit_time_reached or not abort_on_exit: dutils.run_container( diff --git a/tests/schains/monitor/containers_test.py b/tests/schains/monitor/containers_test.py index b8e806f06..fb1b2cdee 100644 --- a/tests/schains/monitor/containers_test.py +++ b/tests/schains/monitor/containers_test.py @@ -4,7 +4,7 @@ from core.schains.runner import is_container_exists from web.models.schain import upsert_schain_record -from tests.utils import run_custom_schain_container +from tests.utils import get_schain_struct, run_custom_schain_container def test_monitor_schain_container( @@ -16,7 +16,7 @@ def test_monitor_schain_container( cleanup_schain_containers ): schain_record = upsert_schain_record(schain_db) - schain = {'name': schain_db, 'partOfNode': 0, 'generation': 0} + schain = get_schain_struct(schain_name=schain_db) monitor_schain_container(schain, schain_record, skaled_status, dutils=dutils) assert not is_container_exists(schain_db, dutils=dutils) @@ -35,7 +35,7 @@ def test_monitor_schain_container_exit_time_reached( cleanup_schain_containers ): schain_record = upsert_schain_record(schain_db) - schain = {'name': schain_db, 'partOfNode': 0, 'generation': 0} + schain = get_schain_struct(schain_name=schain_db) with mock.patch('core.schains.monitor.containers.is_volume_exists', return_value=True): schain_record.set_failed_rpc_count(100) @@ -71,10 +71,9 @@ def test_monitor_schain_container_ec( cleanup_schain_containers ): schain_record = upsert_schain_record(schain_db) - schain = {'name': schain_db, 'partOfNode': 0, 'generation': 0} - schain_name = schain_db + schain = get_schain_struct(schain_name=schain_db) - run_custom_schain_container(dutils, schain_name, entrypoint=['sh', 'exit', '1']) + run_custom_schain_container(dutils, schain.name, entrypoint=['sh', 'exit', '1']) with mock.patch('core.schains.monitor.containers.is_volume_exists', return_value=True): schain_record.set_failed_rpc_count(100) schain_record.set_restart_count(0) diff --git a/tests/schains/monitor/rpc_test.py b/tests/schains/monitor/rpc_test.py index 5445ef887..f87783c77 100644 --- a/tests/schains/monitor/rpc_test.py +++ b/tests/schains/monitor/rpc_test.py @@ -11,6 +11,7 @@ from core.schains.rpc import check_endpoint_blocks from tools.configs.containers import SCHAIN_CONTAINER from web.models.schain import SChainRecord +from tests.utils import get_schain_struct CURRENT_TIMESTAMP = 1594903080 CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) @@ -21,7 +22,7 @@ def test_handle_failed_schain_rpc_no_container(schain_db, dutils, skaled_status) image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db) assert not handle_failed_schain_rpc( - schain={'name': schain_db}, + schain=get_schain_struct(schain_name=schain_db), schain_record=schain_record, skaled_status=skaled_status, dutils=dutils, @@ -44,7 +45,7 @@ def test_handle_failed_schain_rpc_exit_time_reached( finished_at = container_info['stats']['State']['FinishedAt'] assert not handle_failed_schain_rpc( - schain={'name': schain_db}, + schain=get_schain_struct(schain_name=schain_db), schain_record=schain_record, skaled_status=skaled_status_exit_time_reached, dutils=dutils, @@ -72,7 +73,7 @@ def test_monitor_schain_downloading_snapshot( finished_at = container_info['stats']['State']['FinishedAt'] handle_failed_schain_rpc( - schain={'name': schain_db}, + schain=get_schain_struct(schain_name=schain_db), schain_record=schain_record, skaled_status=skaled_status_downloading_snapshot, dutils=dutils, @@ -97,7 +98,7 @@ def test_handle_failed_schain_rpc_stuck_max_retries( finished_at = container_info['stats']['State']['FinishedAt'] handle_failed_schain_rpc( - schain={'name': schain_db}, + schain=get_schain_struct(schain_name=schain_db), schain_record=schain_record, skaled_status=skaled_status, dutils=dutils, @@ -121,7 +122,7 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers, assert schain_record.restart_count == 0 handle_failed_schain_rpc( - schain={'name': schain_db}, + schain=get_schain_struct(schain_name=schain_db), schain_record=schain_record, skaled_status=skaled_status, dutils=dutils, @@ -148,7 +149,7 @@ def test_handle_failed_schain_rpc_stuck( assert schain_record.restart_count == 0 handle_failed_schain_rpc( - schain={'name': schain_db}, + schain=get_schain_struct(schain_name=schain_db), schain_record=schain_record, skaled_status=skaled_status, dutils=dutils, diff --git a/tests/schains/monitor/skaled_monitor_test.py b/tests/schains/monitor/skaled_monitor_test.py index c63cda779..62d17d813 100644 --- a/tests/schains/monitor/skaled_monitor_test.py +++ b/tests/schains/monitor/skaled_monitor_test.py @@ -34,7 +34,7 @@ def run_ima_container_mock(schain: dict, mainnet_chain_id: int, dutils=None): image_name, container_name, _, _ = get_container_info( - IMA_CONTAINER, schain['name']) + IMA_CONTAINER, schain.name) dutils.safe_rm(container_name) dutils.run_container( image_name=image_name, @@ -54,7 +54,7 @@ def monitor_schain_container_mock( historic_state=False ): image_name, container_name, _, _ = get_container_info( - SCHAIN_CONTAINER, schain['name']) + SCHAIN_CONTAINER, schain.name) dutils.safe_rm(container_name) dutils.run_container( image_name=image_name, diff --git a/tests/test_generate_config_sync.py b/tests/test_generate_config_sync.py index 70a5f9fab..adb069cce 100644 --- a/tests/test_generate_config_sync.py +++ b/tests/test_generate_config_sync.py @@ -21,7 +21,7 @@ def test_generate_config(skale): for schain_name in CHAINS: schain = skale.schains.get_by_name(schain_name) - schain_type = get_schain_type(schain['partOfNode']) + schain_type = get_schain_type(schain.part_of_node) node_groups = get_previous_schain_groups(skale, schain_name) original_group = node_groups[0]['nodes'] @@ -33,10 +33,10 @@ def test_generate_config(skale): 'publicKey': value[2] }) - is_owner_contract = is_address_contract(skale.web3, schain['mainnetOwner']) - on_chain_owner = get_on_chain_owner(schain, schain['generation'], is_owner_contract) + is_owner_contract = is_address_contract(skale.web3, schain.mainnet_owner) + on_chain_owner = get_on_chain_owner(schain, schain.generation, is_owner_contract) - mainnet_owner = schain['mainnetOwner'] + mainnet_owner = schain.mainnet_owner originator_address = get_schain_originator(schain) @@ -47,14 +47,14 @@ def test_generate_config(skale): base_config = SChainBaseConfig(BASE_SCHAIN_CONFIG_FILEPATH) predeployed_accounts = generate_predeployed_accounts( - schain_name=schain['name'], + schain_name=schain.name, allocation_type='default', schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, mainnet_owner=mainnet_owner, originator_address=originator_address, - generation=schain['generation'] + generation=schain.generation ) accounts = { diff --git a/tests/utils.py b/tests/utils.py index 29f37e745..5b429907c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -12,6 +12,8 @@ from skale import Skale, SkaleIma from skale.utils.web3_utils import init_web3 +from skale.contracts.manager.schains import SchainStructure +from skale.dataclasses.schain_options import AllocationType, SchainOptions from skale.wallets import Web3Wallet from web3 import Web3 @@ -56,6 +58,9 @@ IMA_MIGRATION_TS = 1688388551 +TEST_ORIGINATOR_ADDRESS = '0x0B5e3eBB74eE281A24DDa3B1A4e70692c15EAC34' +TEST_MAINNET_OWNER_ADDRESS = '0x30E1C96277735B03E59B3098204fd04FD0e78a46' + class FailedAPICall(Exception): pass @@ -113,19 +118,23 @@ def post_bp_data(bp, request, params=None, full_response=False, **kwargs): return json.loads(data.decode('utf-8')) -def get_schain_contracts_data(schain_name): - """ Schain data mock in case if schain on contracts is not required """ - return { - 'name': schain_name, - 'mainnetOwner': '0x1213123091a230923123213123', - 'indexInOwnerList': 0, - 'partOfNode': 0, - 'lifetime': 3600, - 'startDate': 1575448438, - 'deposit': 1000000000000000000, - 'index': 0, - 'active': True - } +def get_schain_struct(schain_name: str = 'test_chain') -> SchainStructure: + return SchainStructure( + name=schain_name, + part_of_node=0, + generation=1, + mainnet_owner=TEST_MAINNET_OWNER_ADDRESS, + originator=TEST_ORIGINATOR_ADDRESS, + options=SchainOptions(True, True, AllocationType.DEFAULT), + index_in_owner_list=0, + lifetime=3600, + start_date=100000000, + start_block=1000, + deposit=0, + index=1, + chain_id=1, + active=True, + ) def run_simple_schain_container(schain_data: dict, dutils: DockerUtils): From 853a922821be8eb81cd446dcaeb5d6d47ed0c0db Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 4 Sep 2024 17:45:27 +0000 Subject: [PATCH 054/103] Move schain_allocation into tests --- {tools => tests}/schain_allocation.py | 0 tests/schain_allocation_test.py | 2 +- tests/schains/config/generator_test.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename {tools => tests}/schain_allocation.py (100%) diff --git a/tools/schain_allocation.py b/tests/schain_allocation.py similarity index 100% rename from tools/schain_allocation.py rename to tests/schain_allocation.py diff --git a/tests/schain_allocation_test.py b/tests/schain_allocation_test.py index 7dba03849..57c1cb816 100644 --- a/tests/schain_allocation_test.py +++ b/tests/schain_allocation_test.py @@ -1,6 +1,6 @@ import pytest from tools.configs import CONFIG_FOLDER -from tools.schain_allocation import generate_schain_allocation +from tests.schain_allocation import generate_schain_allocation EXPECTED_SCHAIN_ALLOCATION = [ ( diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 6eff085cc..d1ad4a298 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -268,7 +268,7 @@ def test_generate_schain_config_gen1(schain_secret_key_file_default_chain, skale node_groups = {} schain_config = generate_schain_config( - schain=get_schain_struct(), + schain=get_schain_struct(schain_name='test_schain'), node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, From b11aee32f44c900a12fc9d497bbcb879635d18a0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 4 Sep 2024 17:46:18 +0000 Subject: [PATCH 055/103] Switch to skale.py==6.4b0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 506ee512e..b8ff328cb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ Jinja2==3.1.2 docker==6.1.3 python-iptables==1.0.1 -skale.py==6.3b0 +skale.py==6.4b0 requests==2.31 ima-predeployed==2.1.0b0 From 95ba7584d4d497a047dc2dabc0d231a35d836162 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 4 Sep 2024 18:34:30 +0000 Subject: [PATCH 056/103] Fix process_manager.py and cleaner --- core/schains/cleaner.py | 2 +- core/schains/process_manager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 33556f153..6b99f3ec0 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -120,7 +120,7 @@ def monitor(skale, node_config, dutils=None): def get_schain_names_from_contract(skale, node_id): schains_on_contract = skale.schains.get_schains_for_node(node_id) - return list(map(lambda schain: schain['name'], schains_on_contract)) + return list(map(lambda schain: schain.name, schains_on_contract)) def get_schains_with_containers(dutils=None): diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index fddaa6a4d..53ad0a2b9 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -100,7 +100,7 @@ def fetch_schains_to_monitor(skale: Skale, node_id: int) -> list: schains = skale.schains.get_schains_for_node(node_id) leaving_schains = get_leaving_schains_for_node(skale, node_id) schains.extend(leaving_schains) - active_schains = list(filter(lambda schain: schain['active'], schains)) + active_schains = list(filter(lambda schain: schain.active, schains)) schains_holes = len(schains) - len(active_schains) logger.info( arguments_list_string({'Node ID': node_id, 'sChains on node': active_schains, From f3ed585886d7184cf14fe1caced28fafdcfe7c7d Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 5 Sep 2024 10:05:39 +0000 Subject: [PATCH 057/103] Fix SchainStructure usage --- core/schains/monitor/main.py | 8 ++++---- core/schains/process_manager.py | 10 +++++----- core/schains/process_manager_helper.py | 4 ++-- web/routes/health.py | 14 +++++++------- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 88170a327..4d44b74bc 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -64,7 +64,7 @@ def run_config_pipeline( skale: Skale, skale_ima: SkaleIma, schain: Dict, node_config: NodeConfig, stream_version: str ) -> None: - name = schain['name'] + name = schain.name schain_record = SChainRecord.get_by_name(name) rotation_data = skale.node_rotation.get_rotation(name) allowed_ranges = get_sync_agent_ranges(skale) @@ -125,7 +125,7 @@ def run_config_pipeline( def run_skaled_pipeline( skale: Skale, schain: SchainStructure, node_config: NodeConfig, dutils: DockerUtils ) -> None: - name = schain['name'] + name = schain.name schain_record = SChainRecord.get_by_name(name) logger.info('Record: %s', SChainRecord.to_dict(schain_record)) @@ -193,7 +193,7 @@ def create_and_execute_tasks( dutils, ): reload(web3_request) - name = schain['name'] + name = schain.name is_rotation_active = skale.node_rotation.is_rotation_active(name) @@ -264,7 +264,7 @@ def run_monitor_for_schain( with ThreadPoolExecutor(max_workers=tasks_number, thread_name_prefix='T') as executor: futures: List[Optional[Future]] = [None for i in range(tasks_number)] while True: - schain_record = SChainRecord.get_by_name(schain['name']) + schain_record = SChainRecord.get_by_name(schain.name) try: create_and_execute_tasks( skale, diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 53ad0a2b9..01cf10e1a 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -67,8 +67,8 @@ def run_process_manager(skale, skale_ima, node_config): def run_pm_schain(skale, skale_ima, node_config, schain: Dict) -> None: - schain_record = upsert_schain_record(schain['name']) - log_prefix = f'sChain {schain["name"]} -' # todo - move to logger formatter + schain_record = upsert_schain_record(schain.name) + log_prefix = f'sChain {schain.name} -' # todo - move to logger formatter terminate_stuck_schain_process(skale, schain_record, schain) monitor_process_alive = is_monitor_process_alive(schain_record.monitor_id) @@ -76,7 +76,7 @@ def run_pm_schain(skale, skale_ima, node_config, schain: Dict) -> None: if not monitor_process_alive: logger.info(f'{log_prefix} PID {schain_record.monitor_id} is not running, spawning...') process = Process( - name=schain['name'], + name=schain.name, target=run_monitor_for_schain, args=( skale, @@ -115,8 +115,8 @@ def get_leaving_schains_for_node(skale: Skale, node_id: int) -> list: leaving_history = skale.node_rotation.get_leaving_history(node_id) for leaving_schain in leaving_history: schain = skale.schains.get(leaving_schain['schain_id']) - if skale.node_rotation.is_rotation_active(schain['name']) and schain['name']: - schain['active'] = True + if skale.node_rotation.is_rotation_active(schain.name) and schain.name: + schain.active = True leaving_schains.append(schain) logger.info(f'Got leaving sChains for the node: {leaving_schains}') return leaving_schains diff --git a/core/schains/process_manager_helper.py b/core/schains/process_manager_helper.py index 2128c7ba1..954e6be80 100644 --- a/core/schains/process_manager_helper.py +++ b/core/schains/process_manager_helper.py @@ -41,11 +41,11 @@ def terminate_stuck_schain_process(skale, schain_record, schain): """ allowed_last_seen_time = _calc_allowed_last_seen_time(skale) if not schain_record.monitor_last_seen: - logging.warning(f'schain: {schain["name"]}, monitor_last_seen is None, skipping...') + logging.warning(f'schain: {schain.name}, monitor_last_seen is None, skipping...') return schain_monitor_last_seen = schain_record.monitor_last_seen.timestamp() if allowed_last_seen_time > schain_monitor_last_seen: - logger.warning(f'schain: {schain["name"]}, pid {schain_record.monitor_id} last seen is \ + logger.warning(f'schain: {schain.name}, pid {schain_record.monitor_id} last seen is \ {schain_monitor_last_seen}, while max allowed last_seen is {allowed_last_seen_time}, pid \ {schain_record.monitor_id} will be terminated now!') terminate_schain_process(schain_record) diff --git a/web/routes/health.py b/web/routes/health.py index 2503d674b..bc4344ca7 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -95,17 +95,17 @@ def schains_checks(): checks = [] for schain in schains: if schain.get('name') != '': - rotation_data = g.skale.node_rotation.get_rotation(schain['name']) + rotation_data = g.skale.node_rotation.get_rotation(schain.name) rotation_id = rotation_data['rotation_id'] - if SChainRecord.added(schain['name']): + if SChainRecord.added(schain.name): rc = get_default_rule_controller( - name=schain['name'], + name=schain.name, sync_agent_ranges=sync_agent_ranges ) - current_nodes = get_current_nodes(g.skale, schain['name']) - schain_record = SChainRecord.get_by_name(schain['name']) + current_nodes = get_current_nodes(g.skale, schain.name) + schain_record = SChainRecord.get_by_name(schain.name) schain_checks = SChainChecks( - schain['name'], + schain.name, node_id, schain_record=schain_record, rule_controller=rc, @@ -117,7 +117,7 @@ def schains_checks(): sync_node=False ).get_all(needed=checks_filter) checks.append({ - 'name': schain['name'], + 'name': schain.name, 'healthchecks': schain_checks }) return construct_ok_response(checks) From 6494c83b5ae7dc5e0002c8c899bad4d6da805038 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 5 Sep 2024 12:51:52 +0000 Subject: [PATCH 058/103] Fix schain endpoint --- web/routes/health.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/routes/health.py b/web/routes/health.py index bc4344ca7..9ca41aa5d 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -94,7 +94,7 @@ def schains_checks(): ) checks = [] for schain in schains: - if schain.get('name') != '': + if schain.name != '': rotation_data = g.skale.node_rotation.get_rotation(schain.name) rotation_id = rotation_data['rotation_id'] if SChainRecord.added(schain.name): From e61bf4efea6e38d9bac1b00518bdb9e2745c9033 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 5 Sep 2024 15:45:05 +0000 Subject: [PATCH 059/103] Use AllocationType in predeployed.py --- core/schains/config/generator.py | 4 ++-- core/schains/config/predeployed.py | 5 +++-- core/schains/limits.py | 6 ++++-- tests/schains/config/predeployed_test.py | 8 +++++--- tests/schains/monitor/main_test.py | 6 ++++-- tests/test_generate_config_sync.py | 2 +- 6 files changed, 19 insertions(+), 12 deletions(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index 7c649a37c..bed831d71 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -38,7 +38,7 @@ from core.schains.config.static_accounts import is_static_accounts, static_accounts from core.schains.config.helper import get_chain_id, get_schain_id from core.schains.dkg.utils import get_common_bls_public_key -from core.schains.limits import get_allocation_type, get_schain_type +from core.schains.limits import get_schain_type from tools.helper import read_json from tools.configs.schains import BASE_SCHAIN_CONFIG_FILEPATH @@ -190,7 +190,7 @@ def generate_schain_config( logger.info('Static accounts not found, generating regular accounts section') predeployed_accounts = generate_predeployed_accounts( schain_name=schain.name, - allocation_type=get_allocation_type(schain), + allocation_type=schain.options.allocation_type, schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, diff --git a/core/schains/config/predeployed.py b/core/schains/config/predeployed.py index 09b779114..eddddb343 100644 --- a/core/schains/config/predeployed.py +++ b/core/schains/config/predeployed.py @@ -19,6 +19,7 @@ import logging +from skale.dataclasses.schain_options import AllocationType from skale.wallets.web3_wallet import public_key_to_address from etherbase_predeployed import ( @@ -60,7 +61,7 @@ def generate_predeployed_accounts( schain_name: str, schain_type: SchainType, - allocation_type: str, + allocation_type: AllocationType, schain_nodes: list, on_chain_owner: str, mainnet_owner: str, @@ -96,7 +97,7 @@ def generate_predeployed_accounts( def generate_v1_predeployed_contracts( schain_type: SchainType, - allocation_type: str, + allocation_type: AllocationType, on_chain_owner: str, mainnet_owner: str, originator_address: str, diff --git a/core/schains/limits.py b/core/schains/limits.py index 4a4b03339..3f0395fb4 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -18,6 +18,7 @@ # along with this program. If not, see . from skale.contracts.manager.schains import SchainStructure +from skale.dataclasses.schain_options import AllocationType from core.schains.types import SchainType, ContainerType, MetricType from tools.helper import read_json @@ -69,8 +70,9 @@ def get_ima_limit(schain_type: SchainType, metric_type: MetricType) -> int: return get_limit(metric_type, schain_type, ContainerType.ima, alloc) -def get_fs_allocated_storage(schain_type: SchainType, allocation_type: str) -> str: - volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] +def get_fs_allocated_storage(schain_type: SchainType, allocation_type: AllocationType) -> str: + allocation_type_name = get_allocation_type(allocation_type) + volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type_name] return volume_limits[FILESTORAGE_LIMIT_OPTION_NAME] diff --git a/tests/schains/config/predeployed_test.py b/tests/schains/config/predeployed_test.py index 74b3a2a29..5c5c640b4 100644 --- a/tests/schains/config/predeployed_test.py +++ b/tests/schains/config/predeployed_test.py @@ -1,3 +1,5 @@ +from skale.dataclasses.schain_options import AllocationType + from marionette_predeployed import MARIONETTE_ADDRESS from etherbase_predeployed import ETHERBASE_ADDRESS from context_predeployed import CONTEXT_ADDRESS @@ -17,7 +19,7 @@ def test_generate_predeployed_accounts(): predeployed_section = generate_predeployed_accounts( schain_name='abc', schain_type=SchainType.medium, - allocation_type='default', + allocation_type=AllocationType.DEFAULT, schain_nodes={}, on_chain_owner='0xD1000000000000000000000000000000000000D1', mainnet_owner='0xD4000000000000000000000000000000000000D4', @@ -29,7 +31,7 @@ def test_generate_predeployed_accounts(): predeployed_section = generate_predeployed_accounts( schain_name='abc', schain_type=SchainType.medium, - allocation_type='default', + allocation_type=AllocationType.DEFAULT, schain_nodes={}, on_chain_owner='0xD1000000000000000000000000000000000000D1', mainnet_owner='0xD4000000000000000000000000000000000000D4', @@ -42,7 +44,7 @@ def test_generate_predeployed_accounts(): def test_generate_v1_predeployed_contracts(): v1_precompiled_contracts = generate_v1_predeployed_contracts( schain_type=SchainType.medium, - allocation_type='default', + allocation_type=AllocationType.DEFAULT, on_chain_owner=MARIONETTE_ADDRESS, mainnet_owner='0x0123456789Ab', message_proxy_for_schain_address='0x987654321fC', diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 3c094ab4b..865fe081d 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -11,6 +11,8 @@ from tools.helper import is_node_part_of_chain from web.models.schain import upsert_schain_record +from tests.utils import get_schain_struct + @pytest.fixture def sync_ranges(skale): @@ -62,7 +64,7 @@ def test_run_monitor_for_schain( skale, skale_ima, node_config, - schain={'name': schain_db, 'partOfNode': 0, 'generation': 0}, + get_schain_struct(schain_name=schain_db), dutils=dutils, once=True ) @@ -86,7 +88,7 @@ def test_run_monitor_for_schain_left( skale, skale_ima, node_config, - schain={'name': schain_not_exists, 'partOfNode': 0, 'generation': 0}, + get_schain_struct(schain_name=schain_db), dutils=dutils, once=True ) diff --git a/tests/test_generate_config_sync.py b/tests/test_generate_config_sync.py index adb069cce..c5ff2b289 100644 --- a/tests/test_generate_config_sync.py +++ b/tests/test_generate_config_sync.py @@ -48,7 +48,7 @@ def test_generate_config(skale): predeployed_accounts = generate_predeployed_accounts( schain_name=schain.name, - allocation_type='default', + allocation_type=AllocationType.DEFAULT, schain_type=schain_type, schain_nodes=schain_nodes_with_schains, on_chain_owner=on_chain_owner, From 4275bde6d5d10af815bfa632ec0f2bf9e1e505f4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 5 Sep 2024 18:23:54 +0000 Subject: [PATCH 060/103] Fix linter --- tests/test_generate_config_sync.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_generate_config_sync.py b/tests/test_generate_config_sync.py index c5ff2b289..989fccf99 100644 --- a/tests/test_generate_config_sync.py +++ b/tests/test_generate_config_sync.py @@ -1,6 +1,7 @@ import json import pytest from skale.schain_config.rotation_history import get_previous_schain_groups +from skale.dataclasses.schain_options import AllocationType from core.schains.config.predeployed import generate_predeployed_accounts from core.schains.config.precompiled import generate_precompiled_accounts From 439803dc7c59ad8b56b9b34246b3bad8532c71f8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 6 Sep 2024 10:25:37 +0000 Subject: [PATCH 061/103] Fix config generation --- core/schains/config/schain_info.py | 8 ++++---- core/schains/limits.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/schains/config/schain_info.py b/core/schains/config/schain_info.py index 497a218c9..22fc89d40 100644 --- a/core/schains/config/schain_info.py +++ b/core/schains/config/schain_info.py @@ -19,7 +19,7 @@ from dataclasses import dataclass -from core.schains.limits import get_allocation_type, get_schain_limit, get_schain_type +from core.schains.limits import get_allocation_type_name, get_schain_limit, get_schain_type from core.schains.types import MetricType from tools.configs.schains import MAX_CONSENSUS_STORAGE_INF_VALUE @@ -76,11 +76,11 @@ def generate_schain_info( archive: bool ) -> SChainInfo: schain_type = get_schain_type(schain.part_of_node) - allocation_type = get_allocation_type(schain) - volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type] + allocation_type_name = get_allocation_type_name(schain.options.allocation_type) + volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type_name] if sync_node and archive: volume_limits['max_consensus_storage_bytes'] = MAX_CONSENSUS_STORAGE_INF_VALUE - leveldb_limits = get_schain_limit(schain_type, MetricType.leveldb_limits)[allocation_type] + leveldb_limits = get_schain_limit(schain_type, MetricType.leveldb_limits)[allocation_type_name] contract_storage_limit = leveldb_limits['contract_storage'] db_storage_limit = leveldb_limits['db_storage'] diff --git a/core/schains/limits.py b/core/schains/limits.py index 3f0395fb4..2cf0f7784 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -36,8 +36,8 @@ def get_schain_type(schain_part_of_node: int) -> SchainType: return SchainType(schain_part_of_node) -def get_allocation_type(schain: SchainStructure) -> str: - return schain.options.allocation_type.name.lower() +def get_allocation_type_name(allocation_type: AllocationType) -> str: + return allocation_type.name.lower() def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: ContainerType, @@ -71,7 +71,7 @@ def get_ima_limit(schain_type: SchainType, metric_type: MetricType) -> int: def get_fs_allocated_storage(schain_type: SchainType, allocation_type: AllocationType) -> str: - allocation_type_name = get_allocation_type(allocation_type) + allocation_type_name = get_allocation_type_name(allocation_type) volume_limits = get_schain_limit(schain_type, MetricType.volume_limits)[allocation_type_name] return volume_limits[FILESTORAGE_LIMIT_OPTION_NAME] From 05186946278648e6ee014f1648678ec29b0a676a Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 6 Sep 2024 12:29:45 +0000 Subject: [PATCH 062/103] Switch to the new GA runner --- .github/workflows/test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 17f83a656..b46acbd04 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,6 +17,9 @@ jobs: ENDPOINT: http://127.0.0.1:8545 CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} SCHAIN_TYPE: ${{ secrets.SCHAIN_TYPE }} + defaults: + run: + working-directory: ~/admin steps: - uses: actions/checkout@v2 with: @@ -24,7 +27,7 @@ jobs: - name: Install python dependencies run: | - source ~/ga/bin/activate + source ~/admin/.venv/bin/activate python --version bash ./scripts/install_python_dependencies.sh From 545cb3c8442172fe6697f4be024b1996fefc2fb6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 6 Sep 2024 14:42:57 +0000 Subject: [PATCH 063/103] Use python3.11 --- .github/workflows/test.yml | 7 ++++--- scripts/run_core_tests.sh | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b46acbd04..6c5eb067f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,9 +27,10 @@ jobs: - name: Install python dependencies run: | - source ~/admin/.venv/bin/activate + source .venv/bin/activate python --version - bash ./scripts/install_python_dependencies.sh + uv pip install -r requirements.txt --prerelease=allow + uv pip install -r requirements-dev.txt - name: Lint with flake8 run: flake8 . @@ -54,7 +55,7 @@ jobs: - name: Run core tests run: | - source ~/ga/bin/activate + source .venv/bin/activate python --version bash ./scripts/run_core_tests.sh diff --git a/scripts/run_core_tests.sh b/scripts/run_core_tests.sh index ae867c129..e63b3064d 100755 --- a/scripts/run_core_tests.sh +++ b/scripts/run_core_tests.sh @@ -13,5 +13,5 @@ export_test_env run_sgx_simulator $SGX_WALLET_TAG bash scripts/run_redis.sh -py.test --cov-config=.coveragerc --cov=. tests/ --ignore=tests/firewall $@ +python -m py.test --cov-config=.coveragerc --cov=. tests --ignore=tests/firewall $@ tests_cleanup From 465954d5fa42fe6bff864ee5b387c36e61a650c6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 6 Sep 2024 15:10:13 +0000 Subject: [PATCH 064/103] Remove defaults section from test workflow --- .github/workflows/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6c5eb067f..dcd3ed9f9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,9 +17,6 @@ jobs: ENDPOINT: http://127.0.0.1:8545 CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} SCHAIN_TYPE: ${{ secrets.SCHAIN_TYPE }} - defaults: - run: - working-directory: ~/admin steps: - uses: actions/checkout@v2 with: From 81dcc8141d9dde8068632894839da52ad7307ed0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 10 Sep 2024 11:21:58 +0000 Subject: [PATCH 065/103] Fix GA test pipeline --- .flake8 | 2 +- .github/workflows/test.yml | 27 ++++++++++++--------------- core/schains/limits.py | 1 - scripts/run_redis.sh | 2 +- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/.flake8 b/.flake8 index a7a1a9dd9..a43fb2bad 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] max-line-length = 100 -exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,venv,node_modules,helper-scripts +exclude = .git,__pycache__,docs/source/conf.py,old,build,dist,venv,.venv,node_modules,helper-scripts diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dcd3ed9f9..1f63da043 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,13 +24,15 @@ jobs: - name: Install python dependencies run: | - source .venv/bin/activate + pwd + ls -altr ../.venv + source ../.venv/bin/activate python --version uv pip install -r requirements.txt --prerelease=allow uv pip install -r requirements-dev.txt - name: Lint with flake8 - run: flake8 . + run: uv run flake8 . - name: Launch anvil node run: | @@ -40,32 +42,27 @@ jobs: run: | bash ./helper-scripts/deploy_test_ima.sh - - name: Cleanup skale-manager image - run: | - docker rmi -f skalenetwork/skale-manager:${{ env.MANAGER_TAG }} - - name: Show stats before tests if: always() run: | - sudo lsblk -f - sudo free -h + lsblk -f + free -h - name: Run core tests run: | - source .venv/bin/activate + source ../.venv/bin/activate python --version bash ./scripts/run_core_tests.sh - name: Cleanup docker artifacts run: | docker rm -f $(docker ps -aq) - docker rmi -f $(docker images -q) - name: Show stats after core tests if: always() run: | - sudo lsblk -f - sudo free -h + lsblk -f + free -h - name: Run firewall tests run: | @@ -74,15 +71,15 @@ jobs: - name: Show stats after firewall tests if: always() run: | - sudo lsblk -f - sudo free -h + lsblk -f + free -h - name: Cleanup docker artifacts if: always() run: | docker rm -f $(docker ps -aq) - docker rmi -f $(docker images -q) - name: Run codecov run: | + source ../.venv/bin/activate codecov -t $CODECOV_TOKEN diff --git a/core/schains/limits.py b/core/schains/limits.py index 2cf0f7784..e4b9d4296 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -17,7 +17,6 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from skale.contracts.manager.schains import SchainStructure from skale.dataclasses.schain_options import AllocationType from core.schains.types import SchainType, ContainerType, MetricType diff --git a/scripts/run_redis.sh b/scripts/run_redis.sh index ceb21edc2..b3591b976 100755 --- a/scripts/run_redis.sh +++ b/scripts/run_redis.sh @@ -2,4 +2,4 @@ set -e docker rm -f redis || true export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -docker run -v $DIR/../tests/redis-conf:/config -p 6381:6381 --name=redis -d redis:6.0-alpine +docker run -v $DIR/../tests/redis-conf:/config --network=host --name=redis -d redis:6.0-alpine From 2a0efec5beec8e291652f2444aa80a1b123cc37f Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 10 Sep 2024 11:26:31 +0000 Subject: [PATCH 066/103] Fix tests --- tests/routes/health_test.py | 8 ++++---- tests/schains/monitor/containers_test.py | 3 +++ tests/schains/monitor/rpc_test.py | 12 ++++++++---- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/tests/routes/health_test.py b/tests/routes/health_test.py index 0fe254ba2..25a20d31b 100644 --- a/tests/routes/health_test.py +++ b/tests/routes/health_test.py @@ -14,7 +14,7 @@ from web.routes.health import health_bp from web.helper import get_api_url -from tests.utils import get_bp_data, run_custom_schain_container +from tests.utils import get_bp_data, get_schain_struct, run_custom_schain_container TEST_SGX_KEYNAME = 'test_keyname' @@ -104,9 +104,9 @@ def __init__(self, *args, **kwargs): def get_schains_for_node_mock(self, node_id): return [ - {'name': schain_name}, - {'name': 'test-schain'}, - {'name': ''} + get_schain_struct(schain_name=schain_name), + get_schain_struct(schain_name='test-schain'), + get_schain_struct(schain_name=''), ] with mock.patch('web.routes.health.SChainChecks', SChainChecksMock): diff --git a/tests/schains/monitor/containers_test.py b/tests/schains/monitor/containers_test.py index fb1b2cdee..e4a1ba32b 100644 --- a/tests/schains/monitor/containers_test.py +++ b/tests/schains/monitor/containers_test.py @@ -1,4 +1,5 @@ import mock +import time from core.schains.monitor.containers import monitor_schain_container from core.schains.runner import is_container_exists @@ -74,6 +75,8 @@ def test_monitor_schain_container_ec( schain = get_schain_struct(schain_name=schain_db) run_custom_schain_container(dutils, schain.name, entrypoint=['sh', 'exit', '1']) + # to make sure container initializaed + time.sleep(2) with mock.patch('core.schains.monitor.containers.is_volume_exists', return_value=True): schain_record.set_failed_rpc_count(100) schain_record.set_restart_count(0) diff --git a/tests/schains/monitor/rpc_test.py b/tests/schains/monitor/rpc_test.py index f87783c77..8d93158bf 100644 --- a/tests/schains/monitor/rpc_test.py +++ b/tests/schains/monitor/rpc_test.py @@ -1,7 +1,7 @@ import datetime import json -import mock -from time import sleep +import time +from unittest import mock import freezegun import requests @@ -38,7 +38,7 @@ def test_handle_failed_schain_rpc_exit_time_reached( image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db) dutils.run_container(image_name=image_name, name=container_name, entrypoint='bash -c "exit 0"') - sleep(7) + time.sleep(7) schain_record.set_failed_rpc_count(100) container_info = dutils.get_info(container_name) @@ -66,7 +66,7 @@ def test_monitor_schain_downloading_snapshot( dutils.run_container( image_name=image_name, name=container_name, entrypoint='bash -c "sleep 100"' ) - sleep(7) + time.sleep(7) schain_record.set_failed_rpc_count(100) container_info = dutils.get_info(container_name) @@ -113,6 +113,8 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers, dutils.run_container( image_name=image_name, name=container_name, entrypoint='bash -c "exit 100;"' ) + # Wait for container initialization + time.sleep(2) schain_record.set_failed_rpc_count(100) schain_record.set_restart_count(0) @@ -127,6 +129,8 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers, skaled_status=skaled_status, dutils=dutils, ) + # Wait for container initialization + time.sleep(2) assert schain_record.restart_count == 0 container_info = dutils.get_info(container_name) assert container_info['stats']['State']['FinishedAt'] == finished_at From 7d1bdc297aa7ca1f16c8e68759f038e9cd6cda5e Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 10 Sep 2024 16:07:20 +0000 Subject: [PATCH 067/103] Fix cleaner --- core/schains/cleaner.py | 7 ++-- core/schains/process.py | 74 ++++++++++++++++++--------------- core/schains/process_manager.py | 20 ++++----- 3 files changed, 54 insertions(+), 47 deletions(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index f4545bcbc..2248c8a14 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -37,7 +37,7 @@ get_node_ips_from_config, get_own_ip_from_config, ) -from core.schains.process import terminate_schain_process +from core.schains.process import ProcessReport, terminate_process from core.schains.runner import get_container_name, is_exited from core.schains.external_config import ExternalConfig from core.schains.types import ContainerType @@ -185,9 +185,10 @@ def remove_schain( msg: str, dutils: Optional[DockerUtils] = None, ) -> None: - schain_record = upsert_schain_record(schain_name) logger.warning(msg) - terminate_schain_process(schain_record) + report = ProcessReport(name=schain_name) + if report.is_exist(): + terminate_process(report) delete_bls_keys(skale, schain_name) sync_agent_ranges = get_sync_agent_ranges(skale) diff --git a/core/schains/process.py b/core/schains/process.py index a66290d03..b655bff3e 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -26,7 +26,6 @@ import pathlib import psutil - from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import check_pid @@ -37,39 +36,6 @@ P_KILL_WAIT_TIMEOUT = 60 -def terminate_process(pid, kill_timeout=P_KILL_WAIT_TIMEOUT, log_msg=''): - log_prefix = f'pid: {pid} - ' - if log_msg != '': - log_prefix += f'{log_msg} - ' - if pid == 0: - logger.warning(f'{log_prefix} - pid is 0, skipping') - return - try: - logger.warning(f'{log_prefix} - going to terminate') - p = psutil.Process(pid) - os.kill(p.pid, signal.SIGTERM) - p.wait(timeout=kill_timeout) - logger.info(f'{log_prefix} was terminated') - except psutil.NoSuchProcess: - logger.info(f'{log_prefix} - no such process') - except psutil.TimeoutExpired: - logger.warning(f'{log_prefix} - timout expired, going to kill') - p.kill() - logger.info(f'{log_prefix} - process was killed') - except Exception: - logging.exception(f'{log_prefix} - termination failed!') - - -def terminate_schain_process(schain_record): - log_msg = f'schain: {schain_record.name}' - terminate_process(schain_record.monitor_id, log_msg=log_msg) - - -def is_monitor_process_alive(monitor_id): - """Checks that provided monitor_id is inited and alive""" - return monitor_id != 0 and check_pid(monitor_id) - - class ProcessReport: REPORT_FILENAME = 'process.json' @@ -127,3 +93,43 @@ def update(self, pid: int, ts: int) -> None: report = {'pid': pid, 'ts': ts} self._save_tmp(report=report) self._move() + + def cleanup(self) -> None: + os.remove(self.path) + + +def terminate_process( + process_report: ProcessReport, + kill_timeout: int = P_KILL_WAIT_TIMEOUT, + log_msg: str = '' +) -> None: + pid = process_report.pid + log_prefix = f'pid: {pid} - ' + + if log_msg != '': + log_prefix += f'{log_msg} - ' + if pid == 0: + logger.warning(f'{log_prefix} - pid is 0, skipping') + return + try: + logger.warning(f'{log_prefix} - going to terminate') + p = psutil.Process(pid) + os.kill(p.pid, signal.SIGTERM) + p.wait(timeout=kill_timeout) + logger.info(f'{log_prefix} was terminated') + except psutil.NoSuchProcess: + logger.info(f'{log_prefix} - no such process') + except psutil.TimeoutExpired: + logger.warning(f'{log_prefix} - timout expired, going to kill') + p.kill() + logger.info(f'{log_prefix} - process was killed') + except Exception: + logger.exception(f'{log_prefix} - termination failed!') + return + logger.info(f'{log_prefix} - removing process report for {pid}') + process_report.cleanup() + + +def is_monitor_process_alive(monitor_pid: int) -> bool: + """Checks that provided monitor_id is inited and alive""" + return monitor_pid != 0 and check_pid(monitor_pid) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index a7785af1b..b926db4b5 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -66,26 +66,26 @@ def run_pm_schain( dkg_timeout = skale.constants_holder.get_dkg_timeout() allowed_diff = timeout or int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) - process_report = ProcessReport(schain['name']) + report = ProcessReport(schain['name']) init_ts = int(time.time()) - if process_report.is_exist(): - if init_ts - process_report.ts > allowed_diff: - logger.info('%s Terminating process: PID = %d', log_prefix, process_report.pid) - terminate_process(process_report.pid) + if report.is_exist() and is_monitor_process_alive(report.pid): + if init_ts - report.ts > allowed_diff: + logger.info('%s Terminating process: PID = %d', log_prefix, report.pid) + terminate_process(report.pid) else: - pid = process_report.pid + pid = report.pid logger.info('%s Process is running: PID = %d', log_prefix, pid) - if not process_report.is_exist() or not is_monitor_process_alive(process_report.pid): - process_report.ts = init_ts + if not report.is_exist() or not is_monitor_process_alive(report.pid): + report.ts = init_ts process = Process( name=schain['name'], target=start_monitor, - args=(skale, schain, node_config, skale_ima, process_report), + args=(skale, schain, node_config, skale_ima, report), ) process.start() pid = process.ident - process_report.pid = pid + report.pid = pid logger.info('%s Process started: PID = %d', log_prefix, pid) From ee9653c7c25a089a56fbb73858b7e2866ee1d5e4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 13 Sep 2024 12:29:16 +0000 Subject: [PATCH 068/103] Fix terminate_process usage --- core/schains/process_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index b926db4b5..2f3cbbf1b 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -71,7 +71,7 @@ def run_pm_schain( if report.is_exist() and is_monitor_process_alive(report.pid): if init_ts - report.ts > allowed_diff: logger.info('%s Terminating process: PID = %d', log_prefix, report.pid) - terminate_process(report.pid) + terminate_process(report) else: pid = report.pid logger.info('%s Process is running: PID = %d', log_prefix, pid) From 3edeb5ca7848f448ca2cb1641418a288d331d37a Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 13 Sep 2024 13:00:05 +0000 Subject: [PATCH 069/103] Switch to TypedDict in limit.py --- core/schains/config/node_info.py | 2 +- core/schains/limits.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/schains/config/node_info.py b/core/schains/config/node_info.py index c535ff6af..f285336bd 100644 --- a/core/schains/config/node_info.py +++ b/core/schains/config/node_info.py @@ -49,8 +49,8 @@ class CurrentNodeInfo(NodeInfo): archive: bool catchup: bool - """Returns camel-case representation of the CurrentNodeInfo object""" def to_dict(self): + """Returns camel-case representation of the CurrentNodeInfo object""" node_info = { **super().to_dict(), **{ diff --git a/core/schains/limits.py b/core/schains/limits.py index e4b9d4296..a2db93293 100644 --- a/core/schains/limits.py +++ b/core/schains/limits.py @@ -17,6 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from typing import TypedDict from skale.dataclasses.schain_options import AllocationType from core.schains.types import SchainType, ContainerType, MetricType @@ -40,7 +41,7 @@ def get_allocation_type_name(allocation_type: AllocationType) -> str: def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: ContainerType, - resource_allocation: dict) -> int: + resource_allocation: TypedDict) -> TypedDict: """ Get allocation option from the resources allocation file @@ -59,7 +60,7 @@ def get_limit(metric_type: MetricType, schain_type: SchainType, container_type: return resource_allocation[container_type.name][metric_type.name][schain_type.name] -def get_schain_limit(schain_type: SchainType, metric_type: MetricType) -> dict: +def get_schain_limit(schain_type: SchainType, metric_type: MetricType) -> TypedDict: alloc = _get_resource_allocation_info() return get_limit(metric_type, schain_type, ContainerType.schain, alloc) From 069dd07f6778203f3d2a40ee128ae82ffbfaf57a Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 13 Sep 2024 14:58:43 +0000 Subject: [PATCH 070/103] Improve testing constants naming --- .github/workflows/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1f63da043..18e6f44e3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,10 +24,7 @@ jobs: - name: Install python dependencies run: | - pwd - ls -altr ../.venv source ../.venv/bin/activate - python --version uv pip install -r requirements.txt --prerelease=allow uv pip install -r requirements-dev.txt From a1846c9a982813916314a6f9ce90ecbb0c7de536 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 13 Sep 2024 15:15:52 +0000 Subject: [PATCH 071/103] Improve testing constants naming --- tests/schains/config/static_params_test.py | 32 ++++++++++++---------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/schains/config/static_params_test.py b/tests/schains/config/static_params_test.py index f6c18cd7f..163e52c32 100644 --- a/tests/schains/config/static_params_test.py +++ b/tests/schains/config/static_params_test.py @@ -11,8 +11,8 @@ TEST_SCHAIN_NAME = 'test-schain' -TS_FOR_ALL_NAME = 'revertableFSPatchTimestamp' -TS_BY_CHAIN_NAME = 'flexibleDeploymentPatchTimestamp' +DEFAULT_TS_NAME = 'revertableFSPatchTimestamp' +CHAIN_SPECIFIC_TS_NAME = 'flexibleDeploymentPatchTimestamp' def test_get_static_schain_cmd(): @@ -48,34 +48,36 @@ def test_get_static_schain_info_custom_chain_ts(): custom_schain_info = get_static_schain_info(TEST_SCHAIN_NAME) default_schain_info = get_static_schain_info('test') - assert custom_schain_info[TS_FOR_ALL_NAME] == default_schain_info[TS_FOR_ALL_NAME] - assert custom_schain_info[TS_BY_CHAIN_NAME] != default_schain_info[TS_BY_CHAIN_NAME] + assert custom_schain_info[DEFAULT_TS_NAME] == default_schain_info[DEFAULT_TS_NAME] + assert custom_schain_info[CHAIN_SPECIFIC_TS_NAME] != default_schain_info[CHAIN_SPECIFIC_TS_NAME] - assert custom_schain_info[TS_BY_CHAIN_NAME] == 1723460400 - assert default_schain_info[TS_BY_CHAIN_NAME] == 0 + assert custom_schain_info[CHAIN_SPECIFIC_TS_NAME] == 1723460400 + assert default_schain_info[CHAIN_SPECIFIC_TS_NAME] == 0 def test_get_schain_static_param(): static_params = get_static_params(ENV_TYPE) legacy_ts_info = get_schain_static_param( - static_params['schain'][TS_FOR_ALL_NAME], TEST_SCHAIN_NAME + static_params['schain'][DEFAULT_TS_NAME], TEST_SCHAIN_NAME ) - assert legacy_ts_info == static_params['schain'].get(TS_FOR_ALL_NAME) + assert legacy_ts_info == static_params['schain'].get(DEFAULT_TS_NAME) print(static_params['schain']) new_ts_info_custom_chain = get_schain_static_param( - static_params['schain'][TS_BY_CHAIN_NAME], TEST_SCHAIN_NAME + static_params['schain'][CHAIN_SPECIFIC_TS_NAME], TEST_SCHAIN_NAME ) - assert new_ts_info_custom_chain != static_params['schain'][TS_BY_CHAIN_NAME] - assert new_ts_info_custom_chain == static_params['schain'][TS_BY_CHAIN_NAME][TEST_SCHAIN_NAME] + assert new_ts_info_custom_chain != static_params['schain'][CHAIN_SPECIFIC_TS_NAME] + assert new_ts_info_custom_chain == \ + static_params['schain'][CHAIN_SPECIFIC_TS_NAME][TEST_SCHAIN_NAME] new_ts_info_default_chain = get_schain_static_param( - static_params['schain'][TS_BY_CHAIN_NAME], 'test' + static_params['schain'][CHAIN_SPECIFIC_TS_NAME], 'test' ) - assert new_ts_info_default_chain != static_params['schain'][TS_BY_CHAIN_NAME] - assert new_ts_info_default_chain != static_params['schain'][TS_BY_CHAIN_NAME].get('test') - assert new_ts_info_default_chain == static_params['schain'][TS_BY_CHAIN_NAME].get('default') + assert new_ts_info_default_chain != static_params['schain'][CHAIN_SPECIFIC_TS_NAME] + assert new_ts_info_default_chain != static_params['schain'][CHAIN_SPECIFIC_TS_NAME].get('test') + assert new_ts_info_default_chain == \ + static_params['schain'][CHAIN_SPECIFIC_TS_NAME].get('default') def test_get_static_node_info(): From ba6e4c933239f3b68489c5032ee70ac9d9db4604 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 16 Sep 2024 17:13:58 +0000 Subject: [PATCH 072/103] Sync with develop 3.0.2 --- .github/workflows/test.yml | 30 ++++++++----------- core/monitoring.py | 5 ++-- scripts/run_core_tests.sh | 2 +- scripts/run_redis.sh | 2 +- tests/monitoring_test.py | 38 ++++++++++++++---------- tests/schains/monitor/containers_test.py | 6 +++- tests/schains/monitor/rpc_test.py | 12 +++++--- tools/configs/__init__.py | 2 ++ tools/docker_utils.py | 6 ++++ 9 files changed, 60 insertions(+), 43 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 17f83a656..f5cc084e9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -24,12 +24,12 @@ jobs: - name: Install python dependencies run: | - source ~/ga/bin/activate - python --version - bash ./scripts/install_python_dependencies.sh + source ../.venv/bin/activate + uv pip install -r requirements.txt --prerelease=allow + uv pip install -r requirements-dev.txt - name: Lint with flake8 - run: flake8 . + run: uv run flake8 . - name: Launch anvil node run: | @@ -39,32 +39,26 @@ jobs: run: | bash ./helper-scripts/deploy_test_ima.sh - - name: Cleanup skale-manager image - run: | - docker rmi -f skalenetwork/skale-manager:${{ env.MANAGER_TAG }} - - name: Show stats before tests if: always() run: | - sudo lsblk -f - sudo free -h + lsblk -f + free -h - name: Run core tests run: | - source ~/ga/bin/activate - python --version + source ../.venv/bin/activate bash ./scripts/run_core_tests.sh - name: Cleanup docker artifacts run: | docker rm -f $(docker ps -aq) - docker rmi -f $(docker images -q) - name: Show stats after core tests if: always() run: | - sudo lsblk -f - sudo free -h + lsblk -f + free -h - name: Run firewall tests run: | @@ -73,15 +67,15 @@ jobs: - name: Show stats after firewall tests if: always() run: | - sudo lsblk -f - sudo free -h + lsblk -f + free -h - name: Cleanup docker artifacts if: always() run: | docker rm -f $(docker ps -aq) - docker rmi -f $(docker images -q) - name: Run codecov run: | + source ../.venv/bin/activate codecov -t $CODECOV_TOKEN diff --git a/core/monitoring.py b/core/monitoring.py index 9aa87dcf0..0884ba55a 100644 --- a/core/monitoring.py +++ b/core/monitoring.py @@ -21,7 +21,7 @@ from typing import Optional from tools.helper import process_template -from tools.docker_utils import DockerUtils +from tools.docker_utils import DockerUtils, get_docker_group_id from tools.configs import SKALE_DIR_HOST from tools.configs.monitoring import ( @@ -68,11 +68,12 @@ def ensure_telegraf_running(dutils: Optional[DockerUtils] = None) -> None: if dutils.is_container_exists(TELEGRAF_CONTAINER_NAME): dutils.restart(TELEGRAF_CONTAINER_NAME) else: + group_id = get_docker_group_id() dutils.run_container( image_name=TELEGRAF_IMAGE, name=TELEGRAF_CONTAINER_NAME, network_mode='host', - user='telegraf:998', + user=f'telegraf:{group_id}', restart_policy={'name': 'on-failure'}, environment={'HOST_PROC': '/host/proc'}, volumes={ diff --git a/scripts/run_core_tests.sh b/scripts/run_core_tests.sh index ae867c129..e63b3064d 100755 --- a/scripts/run_core_tests.sh +++ b/scripts/run_core_tests.sh @@ -13,5 +13,5 @@ export_test_env run_sgx_simulator $SGX_WALLET_TAG bash scripts/run_redis.sh -py.test --cov-config=.coveragerc --cov=. tests/ --ignore=tests/firewall $@ +python -m py.test --cov-config=.coveragerc --cov=. tests --ignore=tests/firewall $@ tests_cleanup diff --git a/scripts/run_redis.sh b/scripts/run_redis.sh index ceb21edc2..b3591b976 100755 --- a/scripts/run_redis.sh +++ b/scripts/run_redis.sh @@ -2,4 +2,4 @@ set -e docker rm -f redis || true export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -docker run -v $DIR/../tests/redis-conf:/config -p 6381:6381 --name=redis -d redis:6.0-alpine +docker run -v $DIR/../tests/redis-conf:/config --network=host --name=redis -d redis:6.0-alpine diff --git a/tests/monitoring_test.py b/tests/monitoring_test.py index c5eb74f13..64d7e17dd 100644 --- a/tests/monitoring_test.py +++ b/tests/monitoring_test.py @@ -1,12 +1,11 @@ +import json import os import pytest from core.monitoring import TelegrafNotConfiguredError, update_telegraf_service -from tools.configs.monitoring import ( - TELEGRAF_TEMPLATE_PATH, - TELEGRAF_CONFIG_PATH -) +from tools.configs import DOCKER_NODE_CONFIG_FILEPATH +from tools.configs.monitoring import TELEGRAF_TEMPLATE_PATH, TELEGRAF_CONFIG_PATH CONFIG_TEMPLATE = """ @@ -24,6 +23,8 @@ """ +DOCKER_GROUP_ID = 1023 + @pytest.fixture def cleanup_container(dutils): @@ -44,24 +45,29 @@ def telegraf_template(): os.remove(TELEGRAF_CONFIG_PATH) -def test_update_telegraf_service(telegraf_template, cleanup_container, dutils): +@pytest.fixture +def docker_node_config(): + try: + with open(DOCKER_NODE_CONFIG_FILEPATH, 'w') as docker_config: + json.dump({'docker_group_id': DOCKER_GROUP_ID}, docker_config) + yield DOCKER_NODE_CONFIG_FILEPATH + finally: + os.remove(DOCKER_NODE_CONFIG_FILEPATH) + + +def test_update_telegraf_service(docker_node_config, telegraf_template, cleanup_container, dutils): node_id = 1 node_ip = '1.1.1.1' with pytest.raises(TelegrafNotConfiguredError): update_telegraf_service( - node_id=node_id, - node_ip='', - url='http://127.0.0.1:1231', - dutils=dutils + node_id=node_id, node_ip='', url='http://127.0.0.1:1231', dutils=dutils ) - update_telegraf_service( - node_ip, - node_id, - url='http://127.0.0.1:1231', - dutils=dutils - ) + update_telegraf_service(node_ip, node_id, url='http://127.0.0.1:1231', dutils=dutils) with open(TELEGRAF_CONFIG_PATH) as config: config = config.read() - assert config == '\n[agent]\n interval = "60s"\n hostname = "1.1.1.1"\n omit_hostname = false\n\n[global_tags]\n node_id = "1"\n\n[[outputs.db]]\n alias = "db"\n urls = ["http://127.0.0.1:1231"]\n' # noqa + assert ( + config == '\n[agent]\n interval = "60s"\n hostname = "1.1.1.1"\n omit_hostname = false\n\n[global_tags]\n node_id = "1"\n\n[[outputs.db]]\n alias = "db"\n urls = ["http://127.0.0.1:1231"]\n') # noqa assert dutils.is_container_running('skale_telegraf') + user_info = dutils.get_info('skale_telegraf')['stats']['Config']['User'] + assert user_info == f'telegraf:{DOCKER_GROUP_ID}' diff --git a/tests/schains/monitor/containers_test.py b/tests/schains/monitor/containers_test.py index b8e806f06..eb0922e14 100644 --- a/tests/schains/monitor/containers_test.py +++ b/tests/schains/monitor/containers_test.py @@ -1,4 +1,6 @@ -import mock +import time + +from unittest import mock from core.schains.monitor.containers import monitor_schain_container from core.schains.runner import is_container_exists @@ -75,6 +77,8 @@ def test_monitor_schain_container_ec( schain_name = schain_db run_custom_schain_container(dutils, schain_name, entrypoint=['sh', 'exit', '1']) + # To make sure container initializaed + time.sleep(2) with mock.patch('core.schains.monitor.containers.is_volume_exists', return_value=True): schain_record.set_failed_rpc_count(100) schain_record.set_restart_count(0) diff --git a/tests/schains/monitor/rpc_test.py b/tests/schains/monitor/rpc_test.py index 5445ef887..65c26ea0b 100644 --- a/tests/schains/monitor/rpc_test.py +++ b/tests/schains/monitor/rpc_test.py @@ -1,7 +1,7 @@ import datetime +import time import json -import mock -from time import sleep +from unittest import mock import freezegun import requests @@ -37,7 +37,7 @@ def test_handle_failed_schain_rpc_exit_time_reached( image_name, container_name, _, _ = get_container_info(SCHAIN_CONTAINER, schain_db) dutils.run_container(image_name=image_name, name=container_name, entrypoint='bash -c "exit 0"') - sleep(7) + time.sleep(7) schain_record.set_failed_rpc_count(100) container_info = dutils.get_info(container_name) @@ -65,7 +65,7 @@ def test_monitor_schain_downloading_snapshot( dutils.run_container( image_name=image_name, name=container_name, entrypoint='bash -c "sleep 100"' ) - sleep(7) + time.sleep(7) schain_record.set_failed_rpc_count(100) container_info = dutils.get_info(container_name) @@ -112,6 +112,8 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers, dutils.run_container( image_name=image_name, name=container_name, entrypoint='bash -c "exit 100;"' ) + # Wait for container initialization + time.sleep(2) schain_record.set_failed_rpc_count(100) schain_record.set_restart_count(0) @@ -126,6 +128,8 @@ def test_monitor_container_exited(schain_db, dutils, cleanup_schain_containers, skaled_status=skaled_status, dutils=dutils, ) + # Wait for container initialization + time.sleep(2) assert schain_record.restart_count == 0 container_info = dutils.get_info(container_name) assert container_info['stats']['State']['FinishedAt'] == finished_at diff --git a/tools/configs/__init__.py b/tools/configs/__init__.py index 0fa95de31..35cfbcb10 100644 --- a/tools/configs/__init__.py +++ b/tools/configs/__init__.py @@ -103,3 +103,5 @@ STATSD_HOST = '127.0.0.1' STATSD_PORT = 8125 SYNC_NODE = os.getenv('SYNC_NODE') == 'True' + +DOCKER_NODE_CONFIG_FILEPATH = os.path.join(NODE_DATA_PATH, 'docker.json') diff --git a/tools/docker_utils.py b/tools/docker_utils.py index 00378fdbf..009c807fb 100644 --- a/tools/docker_utils.py +++ b/tools/docker_utils.py @@ -34,6 +34,7 @@ from docker.models.containers import Container from docker.models.volumes import Volume +from tools.configs import DOCKER_NODE_CONFIG_FILEPATH from tools.configs.containers import ( CONTAINER_NOT_FOUND, CREATED_STATUS, @@ -46,6 +47,7 @@ CONTAINER_LOGS_SEPARATOR ) from tools.configs.logs import REMOVED_CONTAINERS_FOLDER_PATH +from tools.helper import read_json logger = logging.getLogger(__name__) @@ -81,6 +83,10 @@ def inner(*args, **kwargs) -> list: return inner +def get_docker_group_id() -> int: + return read_json(DOCKER_NODE_CONFIG_FILEPATH)['docker_group_id'] + + class DockerUtils: docker_lock = multiprocessing.Lock() From 003062966a1ecab9bab83e44febc9c1d297b33d8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 19 Sep 2024 11:48:12 +0000 Subject: [PATCH 073/103] Add test_add_repair_date --- tests/migrations_test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/migrations_test.py b/tests/migrations_test.py index c744e6250..4a084919f 100644 --- a/tests/migrations_test.py +++ b/tests/migrations_test.py @@ -16,7 +16,8 @@ add_monitor_id_field, add_config_version_field, add_restart_count_field, - add_ssl_change_date_field + add_ssl_change_date_field, + add_repair_date_field ) @@ -118,3 +119,9 @@ def test_add_ssl_change_date_field(upserted_db, migrator, model): add_ssl_change_date_field(upserted_db, migrator) for r in model.select().execute(): r.ssl_change_date < datetime.now() + + +def test_add_repair_date_field(upserted_db, migrator, model): + add_repair_date_field(upserted_db, migrator) + for r in model.select().execute(): + r.repair_date < datetime.now() From d614bcb0e8979f2d22c0f1b0d03d950cc3cb934b Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 19 Sep 2024 11:52:44 +0000 Subject: [PATCH 074/103] Add disable reapir mode test --- tests/schains/monitor/action/skaled_action_test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/schains/monitor/action/skaled_action_test.py b/tests/schains/monitor/action/skaled_action_test.py index 8a1ee3678..970dc04cc 100644 --- a/tests/schains/monitor/action/skaled_action_test.py +++ b/tests/schains/monitor/action/skaled_action_test.py @@ -487,3 +487,12 @@ def test_firewall_rules_action(skaled_am, skaled_checks, rule_controller, econfi SChainRule(port=10009), SChainRule(port=10010, first_ip='127.0.0.2', last_ip='127.0.0.2') ] + + +def test_disable_repair_mode(skaled_am): + skaled_am.schain_record.set_repair_mode(True) + assert skaled_am.schain_record.repair_mode + skaled_am.disable_repair_mode() + assert not skaled_am.schain_record.repair_mode + skaled_am.disable_repair_mode() + assert not skaled_am.schain_record.repair_mode From d690b558926f87dfb033d2a876ac16fa64278c8a Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 19 Sep 2024 12:37:11 +0000 Subject: [PATCH 075/103] Improve node cli status test --- tests/schains/skaled_status_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/schains/skaled_status_test.py b/tests/schains/skaled_status_test.py index 807da04e2..b11422fe1 100644 --- a/tests/schains/skaled_status_test.py +++ b/tests/schains/skaled_status_test.py @@ -1,4 +1,5 @@ from core.schains.status import ( + get_node_cli_status, node_cli_status_filepath, NodeCliStatus, SkaledStatus, @@ -58,6 +59,9 @@ def test_log(skaled_status, _schain_name, caplog): def test_node_cli_status_empty(_schain_name): + cli_status = get_node_cli_status(_schain_name) + assert cli_status is None + status_filepath = node_cli_status_filepath(_schain_name) cli_status = NodeCliStatus(filepath=status_filepath) @@ -66,8 +70,7 @@ def test_node_cli_status_empty(_schain_name): def test_node_cli_status_repair(_schain_name, ncli_status): - status_filepath = node_cli_status_filepath(_schain_name) - cli_status = NodeCliStatus(filepath=status_filepath) + cli_status = get_node_cli_status(_schain_name) assert cli_status.repair_ts == CURRENT_TS assert cli_status.snapshot_from == '127.0.0.1' From 87b0915279b741deaaf4026a98bcd8e56b198432 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 19 Sep 2024 16:09:36 +0000 Subject: [PATCH 076/103] Fix process_manager --- core/schains/process_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index b07959713..d9e1f1027 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -59,7 +59,7 @@ def run_pm_schain( schain: SchainStructure, timeout: Optional[int] = None, ) -> None: - log_prefix = f'sChain {schain["name"]} -' + log_prefix = f'sChain {schain.name} -' if timeout is not None: allowed_diff = timeout From 51382854601d244687f96330de14b3b58719b67c Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 19 Sep 2024 17:46:38 +0000 Subject: [PATCH 077/103] Fix start_monitor arguments type_hints --- core/schains/monitor/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 7835eb385..435c1a8cc 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -173,7 +173,7 @@ def run_skaled_pipeline( def start_monitor( skale: Skale, - schain: dict, + schain: SchainStructure, node_config: NodeConfig, skale_ima: SkaleIma, process_report: ProcessReport, From fca4ac615e54a50336bf53aab8e89bc9050288ee Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 23 Sep 2024 19:19:43 +0000 Subject: [PATCH 078/103] Rework monitor execution strategy --- core/schains/monitor/main.py | 79 +++++++++++++++++++ core/schains/monitor/tasks.py | 119 +++++++++++++++++++++++++++++ core/schains/process.py | 14 +++- core/schains/process_manager.py | 4 +- tests/schains/monitor/main_test.py | 100 +++++++++++++++++++++--- 5 files changed, 302 insertions(+), 14 deletions(-) create mode 100644 core/schains/monitor/tasks.py diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 435c1a8cc..5bc6183dd 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -248,3 +248,82 @@ def start_monitor( run_pipelines(pipelines=pipelines, process_report=process_report, stuck_timeout=stuck_timeout) return True + + +def start_jobs( + skale: Skale, + schain: SchainStructure, + node_config: NodeConfig, + skale_ima: SkaleIma, + process_report: ProcessReport, + dutils: Optional[DockerUtils] = None, +) -> bool: + reload(web3_request) + name = schain.name + + stream_version = get_skale_node_version() + schain_record = upsert_schain_record(name) + + dkg_timeout = skale.constants_holder.get_dkg_timeout() + stuck_timeout = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) + + is_rotation_active = skale.node_rotation.is_rotation_active(name) + + leaving_chain = not SYNC_NODE and not is_node_part_of_chain(skale, name, node_config.id) + if leaving_chain and not is_rotation_active: + logger.info('Not on node (%d), finishing process', node_config.id) + return True + + logger.info( + 'sync_config_run %s, config_version %s, stream_version %s', + schain_record.sync_config_run, + schain_record.config_version, + stream_version, + ) + + statsd_client = get_statsd_client() + monitor_last_seen_ts = schain_record.monitor_last_seen.timestamp() + statsd_client.incr(f'admin.schain.monitor.{no_hyphens(name)}') + statsd_client.gauge(f'admin.schain.monitor_last_seen.{no_hyphens(name)}', monitor_last_seen_ts) + + job_builders = [] + if not leaving_chain: + logger.info('Adding config pipelines to the pool') + pipelines.append( + Pipeline( + name='config', + job=functools.partial( + run_config_pipeline, + skale=skale, + skale_ima=skale_ima, + schain=schain, + node_config=node_config, + stream_version=stream_version, + ), + ) + ) + if schain_record.config_version != stream_version or ( + schain_record.sync_config_run and schain_record.first_run + ): + ConfigFileManager(name).remove_skaled_config() + else: + logger.info('Adding skaled pipeline to the pool') + pipelines.append( + Pipeline( + name='skaled', + job=functools.partial( + run_skaled_pipeline, + skale=skale, + schain=schain, + node_config=node_config, + dutils=dutils, + ), + ) + ) + + if len(pipelines) == 0: + logger.warning('No pipelines to run') + return False + + run_pipelines(pipelines=pipelines, process_report=process_report, stuck_timeout=stuck_timeout) + return True diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py new file mode 100644 index 000000000..8d9e5bc2d --- /dev/null +++ b/core/schains/monitor/tasks.py @@ -0,0 +1,119 @@ +import abc +import logging +import time +from concurrent.futures import Future, ThreadPoolExecutor +from typing import Callable, NamedTuple + +from core.schains.process import ProcessReport + + +logger = logging.getLogger(__name__) + + +STUCK_TIMEOUT = 60 * 60 * 2 +SHUTDOWN_INTERVAL = 60 * 10 + + +class Pipeline(NamedTuple): + name: str + job: Callable + + +class ITaskBuilder(metaclass=abc.ABCMeta): + @property + @abc.abstractmethod + def task_name(self) -> str: + pass + + @property + @abc.abstractmethod + def stuck_timeout(self) -> int: + pass + + @abc.abstractmethod + def build_task(self) -> Callable: + pass + + +def test_job1(): + for i in range(500): + if i % 3 == 1: + print('Job 1 OOO') + time.sleep(1) + + +def test_job2(): + for i in range(500): + if i % 3 == 1: + print('Job 2 YYY') + time.sleep(1) + + +class TestPipelineBuilder1: + def __init__(self) -> None: + self._task_name = 'task2' + self._stuck_timeout = 5 + + @property + def task_name(self) -> str: + return self._task_name + + @property + def stuck_timeout(self) -> str: + return self._stuck_timeout + + def build_task(self): + return [ + Pipeline(name='test0', job=test_job1()), + ] + + +class TestPipelineBuilder2: + def __init__(self) -> None: + self.task_name = 'task2' + self.stuck_timeout = 5 + + def build_task(self): + return [ + Pipeline(name='test1', job=test_job2()), + ] + + +class StuckMonitorError(Exception): + pass + + +def execute_tasks( + task_builders: list[ITaskBuilder], + process_report: ProcessReport, + once: bool = False, + shutdown_interval: int = SHUTDOWN_INTERVAL, +) -> None: + with ThreadPoolExecutor(max_workers=len(task_builders), thread_name_prefix='mon_') as executor: + stucked = [] + futures = [Future() for _ in task_builders] + start_ts = [0 for _ in task_builders] + while True: + for index, builder in enumerate(task_builders): + if not futures[index].running(): + job = builder.build_task() + start_ts[index] = int(time.time()) + futures[index] = executor.submit(job) + else: + if time.time() - start_ts[index] > builder.stuck_timeout: + canceled = futures[index].cancel() + if not canceled: + logger.warning('Stuck detected for job {builder.name}') + stucked.append(builder.name) + + logger.info('Sleeping before stopping executor') + time.sleep(shutdown_interval) + + if len(stucked) > 0: + executor.shutdown(wait=False) + logger.info('Subverting execution. Stucked %s', stucked) + process_report.ts = 0 + break + if once: + break + process_report.ts = int(time.time()) diff --git a/core/schains/process.py b/core/schains/process.py index b655bff3e..4b4173ad0 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -98,12 +98,22 @@ def cleanup(self) -> None: os.remove(self.path) -def terminate_process( +def shutdown_process( process_report: ProcessReport, kill_timeout: int = P_KILL_WAIT_TIMEOUT, log_msg: str = '' ) -> None: pid = process_report.pid + terminate_process(pid=pid, kill_timeout=kill_timeout, log_msg=log_msg) + logger.info(f'Removing process report for {pid}') + process_report.cleanup() + + +def terminate_process( + pid: int, + kill_timeout: int = P_KILL_WAIT_TIMEOUT, + log_msg: str = '' +) -> None: log_prefix = f'pid: {pid} - ' if log_msg != '': @@ -126,8 +136,6 @@ def terminate_process( except Exception: logger.exception(f'{log_prefix} - termination failed!') return - logger.info(f'{log_prefix} - removing process report for {pid}') - process_report.cleanup() def is_monitor_process_alive(monitor_pid: int) -> bool: diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index d9e1f1027..9527ea2bf 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -30,7 +30,7 @@ from core.schains.notifications import notify_if_not_enough_balance from core.schains.process import ( is_monitor_process_alive, - terminate_process, + shutdown_process, ProcessReport, ) @@ -72,7 +72,7 @@ def run_pm_schain( if report.is_exist() and is_monitor_process_alive(report.pid): if init_ts - report.ts > allowed_diff: logger.info('%s Terminating process: PID = %d', log_prefix, report.pid) - terminate_process(report) + shutdown_process(report) else: pid = report.pid logger.info('%s Process is running: PID = %d', log_prefix, pid) diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index da003fc33..36f7584e9 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -5,6 +5,7 @@ import shutil import time from multiprocessing import Process +from typing import Callable import pytest @@ -12,6 +13,7 @@ from core.schains.firewall.utils import get_sync_agent_ranges from core.schains.monitor.main import Pipeline, run_pipelines from core.schains.process import ProcessReport, terminate_process +from core.schains.monitor.tasks import execute_tasks, ITaskBuilder from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import is_node_part_of_chain @@ -42,7 +44,7 @@ def test_get_sync_agent_ranges(skale, sync_ranges): ranges = get_sync_agent_ranges(skale) assert ranges == [ IpRange(start_ip='127.0.0.1', end_ip='127.0.0.2'), - IpRange(start_ip='127.0.0.5', end_ip='127.0.0.7') + IpRange(start_ip='127.0.0.5', end_ip='127.0.0.7'), ] @@ -77,10 +79,17 @@ def stuck_pipeline(index: int): process_report = ProcessReport(name=_schain_name) - target = functools.partial(run_pipelines, pipelines=[ - Pipeline(name='healthy0', job=functools.partial(simple_pipeline, index=0)), - Pipeline(name='healthy1', job=functools.partial(simple_pipeline, index=1)), - ], process_report=process_report, once=True, stuck_timeout=5, shutdown_interval=10) + target = functools.partial( + run_pipelines, + pipelines=[ + Pipeline(name='healthy0', job=functools.partial(simple_pipeline, index=0)), + Pipeline(name='healthy1', job=functools.partial(simple_pipeline, index=1)), + ], + process_report=process_report, + once=True, + stuck_timeout=5, + shutdown_interval=10, + ) terminated = False monitor_process = Process(target=target) @@ -93,10 +102,83 @@ def stuck_pipeline(index: int): terminate_process(monitor_process.ident) assert not terminated - target = functools.partial(run_pipelines, pipelines=[ - Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), - Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)) - ], process_report=process_report, stuck_timeout=5, shutdown_interval=10) + target = functools.partial( + run_pipelines, + pipelines=[ + Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), + Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)), + ], + process_report=process_report, + stuck_timeout=5, + shutdown_interval=10, + ) + + monitor_process = Process(target=target) + terminated = False + + try: + monitor_process.start() + monitor_process.join(timeout=50) + finally: + if monitor_process.is_alive(): + terminated = True + terminate_process(monitor_process.ident) + + assert terminated + + +def test_execute_jobs(tmp_dir, _schain_name): + def simple_pipeline(index: int) -> None: + logging.info('Running simple pipeline %d', index) + iterations = 7 + for i in range(iterations): + logging.info('Simple pipeline beat %d', index) + time.sleep(1) + + def stuck_pipeline(index: int) -> None: + logging.info('Running stuck pipeline %d', index) + iterations = 7 + for i in range(iterations): + logging.info('Stuck pipeline %d beat', index) + time.sleep(1) + + class NormalTaskBuilder(ITaskBuilder): + def __init__(self, index: int) -> None: + self.index = index + self.task_name = 'task-a' + self._stuck_timeout = 11 + + def task_name(self) -> str: + return self._task_name + + def stuck_timeout(self) -> int: + return self._stuck_timeout + + def build_task(self) -> Callable: + return functools.partial(simple_pipeline, index=self.index) + + class StuckTaskBuilder(ITaskBuilder): + def __init__(self, index) -> None: + self._task_name = 'task-b' + self.index = index + self._stuck_timeout = 5 + + def task_name(self) -> str: + return self._task_name + + def stuck_timeout(self) -> int: + return self._stuck_timeout + + def build_task(self) -> Callable: + return functools.partial(stuck_pipeline, index=self.index) + + process_report = ProcessReport(name=_schain_name) + target = functools.partial( + execute_tasks, + task_builders=[StuckTaskBuilder(0), StuckTaskBuilder(1)], + process_report=process_report, + shutdown_interval=10, + ) monitor_process = Process(target=target) terminated = False From ccc6bde7d0d9b5129c4baef7584703fd0f44a6b7 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 24 Sep 2024 20:10:53 +0000 Subject: [PATCH 079/103] Wrap pipelines into Tasks classes --- core/schains/monitor/main.py | 238 +++++++++++++++++++++-------- core/schains/monitor/tasks.py | 101 +++++------- core/schains/process_manager.py | 4 +- tests/schains/monitor/main_test.py | 72 +++++---- 4 files changed, 258 insertions(+), 157 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index f0b3ac9cb..fe1eb63d8 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -19,7 +19,7 @@ import functools import logging -from typing import Optional +from typing import Callable, Optional from importlib import reload from skale import Skale, SkaleIma @@ -37,6 +37,7 @@ from core.schains.monitor import get_skaled_monitor, RegularConfigMonitor, SyncConfigMonitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager from core.schains.monitor.pipeline import Pipeline, run_pipelines +from core.schains.monitor.tasks import execute_tasks, Future, ITask from core.schains.process import ProcessReport from core.schains.status import get_node_cli_status, get_skaled_status from core.node import get_current_nodes @@ -59,24 +60,32 @@ logger = logging.getLogger(__name__) +class NoTasksToRunError(Exception): + pass + + def run_config_pipeline( - skale: Skale, skale_ima: SkaleIma, schain: dict, node_config: NodeConfig, stream_version: str + schain_name: str, + skale: Skale, + skale_ima: SkaleIma, + node_config: NodeConfig, + stream_version: str, ) -> None: - name = schain.name - schain_record = SChainRecord.get_by_name(name) - rotation_data = skale.node_rotation.get_rotation(name) + schain = skale.schains.get_by_name(schain_name) + schain_record = SChainRecord.get_by_name(schain_name) + rotation_data = skale.node_rotation.get_rotation(schain_name) allowed_ranges = get_sync_agent_ranges(skale) - ima_linked = not SYNC_NODE and skale_ima.linker.has_schain(name) - group_index = skale.schains.name_to_group_id(name) + ima_linked = not SYNC_NODE and skale_ima.linker.has_schain(schain_name) + group_index = skale.schains.name_to_group_id(schain_name) last_dkg_successful = skale.dkg.is_last_dkg_successful(group_index) - current_nodes = get_current_nodes(skale, name) + current_nodes = get_current_nodes(skale, schain_name) estate = ExternalState( ima_linked=ima_linked, chain_id=skale_ima.web3.eth.chain_id, ranges=allowed_ranges ) - econfig = ExternalConfig(name) + econfig = ExternalConfig(schain_name) config_checks = ConfigChecks( - schain_name=name, + schain_name=schain_name, node_id=node_config.id, schain_record=schain_record, stream_version=stream_version, @@ -112,24 +121,25 @@ def run_config_pipeline( mon = RegularConfigMonitor(config_am, config_checks) statsd_client = get_statsd_client() - statsd_client.incr(f'admin.config_pipeline.{mon.__class__.__name__}.{no_hyphens(name)}') + statsd_client.incr(f'admin.config_pipeline.{mon.__class__.__name__}.{no_hyphens(schain_name)}') statsd_client.gauge( - f'admin.config_pipeline.rotation_id.{no_hyphens(name)}', rotation_data['rotation_id'] + f'admin.config_pipeline.rotation_id.{no_hyphens(schain_name)}', rotation_data['rotation_id'] ) - with statsd_client.timer(f'admin.config_pipeline.duration.{no_hyphens(name)}'): + with statsd_client.timer(f'admin.config_pipeline.duration.{no_hyphens(schain_name)}'): mon.run() def run_skaled_pipeline( - skale: Skale, schain: SchainStructure, node_config: NodeConfig, dutils: DockerUtils + schain_name: str, skale: Skale, node_config: NodeConfig, dutils: DockerUtils ) -> None: - name = schain.name - schain_record = SChainRecord.get_by_name(name) + schain = skale.schains.get_by_name(schain_name) + schain_record = SChainRecord.get_by_name(schain_name) + logger.info('Record: %s', SChainRecord.to_dict(schain_record)) dutils = dutils or DockerUtils() - rc = get_default_rule_controller(name=name) + rc = get_default_rule_controller(name=schain_name) skaled_checks = SkaledChecks( schain_name=schain.name, schain_record=schain_record, @@ -138,8 +148,8 @@ def run_skaled_pipeline( sync_node=SYNC_NODE, ) - skaled_status = get_skaled_status(name) - ncli_status = get_node_cli_status(name) + skaled_status = get_skaled_status(schain_name) + ncli_status = get_node_cli_status(schain_name) skaled_am = SkaledActionManager( schain=schain, @@ -147,13 +157,13 @@ def run_skaled_pipeline( checks=skaled_checks, node_config=node_config, ncli_status=ncli_status, - econfig=ExternalConfig(name), + econfig=ExternalConfig(schain_name), dutils=dutils, ) check_status = skaled_checks.get_all(log=False, expose=True) automatic_repair = get_automatic_repair_option() api_status = get_api_checks_status(status=check_status, allowed=TG_ALLOWED_CHECKS) - notify_checks(name, node_config.all(), api_status) + notify_checks(schain_name, node_config.all(), api_status) logger.info('Skaled check status: %s', check_status) @@ -165,12 +175,12 @@ def run_skaled_pipeline( schain_record=schain_record, skaled_status=skaled_status, ncli_status=ncli_status, - automatic_repair=automatic_repair + automatic_repair=automatic_repair, ) statsd_client = get_statsd_client() - statsd_client.incr(f'admin.skaled_pipeline.{mon.__name__}.{no_hyphens(name)}') - with statsd_client.timer(f'admin.skaled_pipeline.duration.{no_hyphens(name)}'): + statsd_client.incr(f'admin.skaled_pipeline.{mon.__name__}.{no_hyphens(schain_name)}') + with statsd_client.timer(f'admin.skaled_pipeline.duration.{no_hyphens(schain_name)}'): mon(skaled_am, skaled_checks).run() @@ -253,7 +263,127 @@ def start_monitor( return True -def start_jobs( +class SkaledTask(ITask): + NAME = 'skaled' + STUCK_TIMEOUT = 3600 # 1 hours + + def __init__( + self, + schain_name: str, + skale: Skale, + node_config: NodeConfig, + stream_version: str, + dutils: Optional[DockerUtils] = None, + ) -> None: + self.schain_name = schain_name + self.skale = skale + self.node_config = node_config + self.dutils = dutils + self._future = Future() + self.stream_version = stream_version + + @property + def name(self) -> str: + return self.NAME + + @property + def stuck_timeout(self) -> int: + return self.STUCK_TIMEOUT + + @property + def future(self) -> Future: + return self._future + + @future.setter + def future(self, value: Future) -> None: + self._future = value + + @property + def start_ts(self) -> int: + return self._start_ts + + @start_ts.setter + def start_ts(self, value: int) -> None: + self._start_ts = value + + @property + def needed(self) -> bool: + schain_record = upsert_schain_record(self.schain_name) + return schain_record.config_version == self.stream_version and ( + not schain_record.sync_config_run or not schain_record.first_run + ) + + def create_pipeline(self) -> Callable: + return functools.partial( + run_skaled_pipeline, + schain_name=self.schain_name, + skale=self.skale, + node_config=self.node_config, + dutils=self.dutils, + ) + + +class ConfigTask(ITask): + NAME = 'config' + STUCK_TIMEOUT = 60 * 60 * 2 + + def __init__( + self, + schain_name: str, + skale: Skale, + skale_ima: SkaleIma, + node_config: NodeConfig, + stream_version: str, + ) -> None: + self.schain_name = schain_name + self.skale = skale + self.skale_ima = skale_ima + self.node_config = node_config + self.stream_version = stream_version + self._start_ts = 0 + self._future = Future() + + @property + def name(self) -> str: + return self.NAME + + @property + def future(self) -> Future: + return self._future + + @future.setter + def future(self, value: Future) -> None: + self._future = value + + @property + def stuck_timeout(self) -> int: + dkg_timeout = self.skale.constants_holder.get_dkg_timeout() + return int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) + + @property + def start_ts(self) -> int: + return self._start_ts + + @start_ts.setter + def start_ts(self, value: int) -> None: + self._start_ts = value + + @property + def needed(self) -> bool: + return SYNC_NODE or is_node_part_of_chain(self.skale, self.schain_name, self.node_config.id) + + def create_pipeline(self) -> Callable: + return functools.partial( + run_config_pipeline, + schain_name=self.schain_name, + skale=self.skale, + skale_ima=self.skale_ima, + node_config=self.node_config, + stream_version=self.stream_version, + ) + + +def start_tasks( skale: Skale, schain: SchainStructure, node_config: NodeConfig, @@ -262,14 +392,12 @@ def start_jobs( dutils: Optional[DockerUtils] = None, ) -> bool: reload(web3_request) + name = schain.name stream_version = get_skale_node_version() schain_record = upsert_schain_record(name) - dkg_timeout = skale.constants_holder.get_dkg_timeout() - stuck_timeout = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) - is_rotation_active = skale.node_rotation.is_rotation_active(name) leaving_chain = not SYNC_NODE and not is_node_part_of_chain(skale, name, node_config.id) @@ -289,44 +417,26 @@ def start_jobs( statsd_client.incr(f'admin.schain.monitor.{no_hyphens(name)}') statsd_client.gauge(f'admin.schain.monitor_last_seen.{no_hyphens(name)}', monitor_last_seen_ts) - job_builders = [] - if not leaving_chain: - logger.info('Adding config pipelines to the pool') - pipelines.append( - Pipeline( - name='config', - job=functools.partial( - run_config_pipeline, - skale=skale, - skale_ima=skale_ima, - schain=schain, - node_config=node_config, - stream_version=stream_version, - ), - ) - ) if schain_record.config_version != stream_version or ( schain_record.sync_config_run and schain_record.first_run ): + logger.info('Fetching upstream config requested. Removing the old skaled config') ConfigFileManager(name).remove_skaled_config() - else: - logger.info('Adding skaled pipeline to the pool') - pipelines.append( - Pipeline( - name='skaled', - job=functools.partial( - run_skaled_pipeline, - skale=skale, - schain=schain, - node_config=node_config, - dutils=dutils, - ), - ) - ) - if len(pipelines) == 0: - logger.warning('No pipelines to run') - return False - - run_pipelines(pipelines=pipelines, process_report=process_report, stuck_timeout=stuck_timeout) - return True + tasks = [ + ConfigTask( + schain_name=schain.name, + skale=skale, + skale_ima=skale_ima, + node_config=node_config, + stream_version=stream_version, + ), + SkaledTask( + schain_name=schain.name, + skale=skale, + node_config=node_config, + stream_version=stream_version, + dutils=dutils + ), + ] + execute_tasks(tasks=tasks, process_report=process_report) diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index 8d9e5bc2d..1fd86052b 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -11,7 +11,7 @@ STUCK_TIMEOUT = 60 * 60 * 2 -SHUTDOWN_INTERVAL = 60 * 10 +SLEEP_INTERVAL = 60 * 10 class Pipeline(NamedTuple): @@ -19,10 +19,10 @@ class Pipeline(NamedTuple): job: Callable -class ITaskBuilder(metaclass=abc.ABCMeta): +class ITask(metaclass=abc.ABCMeta): @property @abc.abstractmethod - def task_name(self) -> str: + def name(self) -> str: pass @property @@ -31,52 +31,32 @@ def stuck_timeout(self) -> int: pass @abc.abstractmethod - def build_task(self) -> Callable: + def create_pipeline(self) -> Callable: pass + @property + @abc.abstractmethod + def future(self) -> Future: + pass -def test_job1(): - for i in range(500): - if i % 3 == 1: - print('Job 1 OOO') - time.sleep(1) - - -def test_job2(): - for i in range(500): - if i % 3 == 1: - print('Job 2 YYY') - time.sleep(1) - - -class TestPipelineBuilder1: - def __init__(self) -> None: - self._task_name = 'task2' - self._stuck_timeout = 5 + @future.setter + @abc.abstractmethod + def future(self, value: Future) -> None: + pass @property - def task_name(self) -> str: - return self._task_name + def needed(self) -> bool: + pass @property - def stuck_timeout(self) -> str: - return self._stuck_timeout - - def build_task(self): - return [ - Pipeline(name='test0', job=test_job1()), - ] - - -class TestPipelineBuilder2: - def __init__(self) -> None: - self.task_name = 'task2' - self.stuck_timeout = 5 + @abc.abstractmethod + def start_ts(self) -> int: + pass - def build_task(self): - return [ - Pipeline(name='test1', job=test_job2()), - ] + @start_ts.setter + @abc.abstractmethod + def start_ts(self, value: int) -> None: + pass class StuckMonitorError(Exception): @@ -84,36 +64,31 @@ class StuckMonitorError(Exception): def execute_tasks( - task_builders: list[ITaskBuilder], + tasks: list[ITask], process_report: ProcessReport, - once: bool = False, - shutdown_interval: int = SHUTDOWN_INTERVAL, + sleep_interval: int = SLEEP_INTERVAL, ) -> None: - with ThreadPoolExecutor(max_workers=len(task_builders), thread_name_prefix='mon_') as executor: + with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='mon') as executor: stucked = [] - futures = [Future() for _ in task_builders] - start_ts = [0 for _ in task_builders] while True: - for index, builder in enumerate(task_builders): - if not futures[index].running(): - job = builder.build_task() - start_ts[index] = int(time.time()) - futures[index] = executor.submit(job) - else: - if time.time() - start_ts[index] > builder.stuck_timeout: - canceled = futures[index].cancel() + for index, task in enumerate(tasks): + if not task.future.running() and task.needed: + task.start_ts = int(time.time()) + logger.info('Starting task %s at %d', task.name, task.start_ts) + pipeline = task.create_pipeline() + task.future = executor.submit(pipeline) + if task.future.running(): + if int(time.time()) - task.start_ts > task.stuck_timeout: + logger.info('Canceling future for %s', task.name) + canceled = task.future.cancel() if not canceled: - logger.warning('Stuck detected for job {builder.name}') - stucked.append(builder.name) - - logger.info('Sleeping before stopping executor') - time.sleep(shutdown_interval) - + logger.warning('Stuck detected for job {task.name}') + stucked.append(task.name) + time.sleep(sleep_interval) if len(stucked) > 0: + logger.info('Sleeping before subverting execution') executor.shutdown(wait=False) logger.info('Subverting execution. Stucked %s', stucked) process_report.ts = 0 break - if once: - break process_report.ts = int(time.time()) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 9527ea2bf..36e02253a 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -26,7 +26,7 @@ from skale.contracts.manager.schains import SchainStructure from core.node_config import NodeConfig -from core.schains.monitor.main import start_monitor +from core.schains.monitor.main import start_tasks from core.schains.notifications import notify_if_not_enough_balance from core.schains.process import ( is_monitor_process_alive, @@ -81,7 +81,7 @@ def run_pm_schain( report.ts = init_ts process = Process( name=schain.name, - target=start_monitor, + target=start_tasks, args=(skale, schain, node_config, skale_ima, report), ) process.start() diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 36f7584e9..6192b13e5 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -4,6 +4,7 @@ import pathlib import shutil import time +from concurrent.futures import Future from multiprocessing import Process from typing import Callable @@ -13,7 +14,7 @@ from core.schains.firewall.utils import get_sync_agent_ranges from core.schains.monitor.main import Pipeline, run_pipelines from core.schains.process import ProcessReport, terminate_process -from core.schains.monitor.tasks import execute_tasks, ITaskBuilder +from core.schains.monitor.tasks import execute_tasks, ITask from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import is_node_part_of_chain @@ -127,55 +128,70 @@ def stuck_pipeline(index: int): assert terminated -def test_execute_jobs(tmp_dir, _schain_name): - def simple_pipeline(index: int) -> None: - logging.info('Running simple pipeline %d', index) - iterations = 7 - for i in range(iterations): - logging.info('Simple pipeline beat %d', index) - time.sleep(1) - - def stuck_pipeline(index: int) -> None: +def test_execute_tasks(tmp_dir, _schain_name): + def run_stuck_pipeline(index: int) -> None: logging.info('Running stuck pipeline %d', index) iterations = 7 for i in range(iterations): logging.info('Stuck pipeline %d beat', index) time.sleep(1) - class NormalTaskBuilder(ITaskBuilder): - def __init__(self, index: int) -> None: + class StuckedTask(ITask): + def __init__(self, index) -> None: + self._name = 'stucked-task' self.index = index - self.task_name = 'task-a' - self._stuck_timeout = 11 + self._stuck_timeout = 5 + self._start_ts = 0 + self._future = Future() - def task_name(self) -> str: - return self._task_name + @property + def name(self) -> str: + return self._name - def stuck_timeout(self) -> int: - return self._stuck_timeout + @property + def future(self) -> Future: + return self._future - def build_task(self) -> Callable: - return functools.partial(simple_pipeline, index=self.index) + @future.setter + def future(self, value: Future) -> None: + self._future = value - class StuckTaskBuilder(ITaskBuilder): - def __init__(self, index) -> None: - self._task_name = 'task-b' - self.index = index - self._stuck_timeout = 5 + @property + def start_ts(self) -> int: + return self._start_ts + + @start_ts.setter + def start_ts(self, value: int) -> None: + self._start_ts = value + @property def task_name(self) -> str: return self._task_name + @property def stuck_timeout(self) -> int: return self._stuck_timeout - def build_task(self) -> Callable: - return functools.partial(stuck_pipeline, index=self.index) + @property + def needed(self) -> bool: + return True + + def create_pipeline(self) -> Callable: + return functools.partial(run_stuck_pipeline, index=self.index) + + class NotNeededTask(StuckedTask): + def __init__(self, index: int) -> None: + self.index = index + self._name = 'not-needed-task' + + @property + def needed(self) -> bool: + return False process_report = ProcessReport(name=_schain_name) target = functools.partial( execute_tasks, - task_builders=[StuckTaskBuilder(0), StuckTaskBuilder(1)], + tasks=[StuckedTask(0), NotNeededTask(1)], process_report=process_report, shutdown_interval=10, ) From 72d30c0fc9ea9369d6ebbf1a8e2dbb10e6c24e16 Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Wed, 25 Sep 2024 16:16:37 +0100 Subject: [PATCH 080/103] Check SGX ZeroMQ connection --- web/routes/health.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/web/routes/health.py b/web/routes/health.py index 9ca41aa5d..031963507 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -51,11 +51,6 @@ BLUEPRINT_NAME = 'health' -class SGXStatus(Enum): - CONNECTED = 0 - NOT_CONNECTED = 1 - - health_bp = Blueprint(BLUEPRINT_NAME, __name__) @@ -138,26 +133,27 @@ def ima_log_checks(): def sgx_info(): logger.debug(request) sgx = SgxClient(SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER) + status_https = False + status_zmq = False + version = None try: - status = sgx.get_server_status() + if sgx.get_server_status() == 0: + status_https = True version = sgx.get_server_version() except Exception as e: # todo: catch specific error - edit sgx.py logger.info(e) - status = 1 - version = None - sgx_host = urlparse(SGX_SERVER_URL).hostname - tn = telnetlib.Telnet() - zmq_status = 0 + sgx_zmq = SgxClient(SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER, zmq=True) try: - tn.open(sgx_host, ZMQ_PORT, timeout=ZMQ_TIMEOUT) - except Exception as err: - zmq_status = 1 - logger.error(err) - else: - tn.close() + if sgx_zmq.zmq.get_server_status() == 0: + status_zmq = True + if version is None: + version = sgx_zmq.zmq.get_server_version() + except Exception as e: # todo: catch specific error - edit sgx.py + logger.info(e) + res = { - 'status': zmq_status, - 'status_name': SGXStatus(status).name, + 'status_zmq': status_zmq, + 'status_https': status_https, 'sgx_server_url': SGX_SERVER_URL, 'sgx_keyname': g.config.sgx_key_name, 'sgx_wallet_version': version From 8ae6cecc4faa84abaf539449d2d464a76a5c0ade Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Wed, 25 Sep 2024 16:52:37 +0100 Subject: [PATCH 081/103] Update sgx tests --- tests/routes/health_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/routes/health_test.py b/tests/routes/health_test.py index 25a20d31b..cb36bb87b 100644 --- a/tests/routes/health_test.py +++ b/tests/routes/health_test.py @@ -159,8 +159,8 @@ def test_sgx(skale_bp, skale): assert data == { 'payload': { 'sgx_server_url': SGX_SERVER_URL, - 'status': 0, - 'status_name': 'CONNECTED', + 'status_zmq': True, + 'status_https': True, 'sgx_wallet_version': version, 'sgx_keyname': TEST_SGX_KEYNAME, }, From 3a7d3d8ac6345242ce45a9a07251a6c3bb0d9245 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 25 Sep 2024 17:06:45 +0000 Subject: [PATCH 082/103] Fix tests --- core/schains/monitor/tasks.py | 1 + tests/schains/monitor/main_test.py | 30 ++++++++++----------------- tests/schains/process_manager_test.py | 2 +- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index 1fd86052b..c6342f7a0 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -83,6 +83,7 @@ def execute_tasks( canceled = task.future.cancel() if not canceled: logger.warning('Stuck detected for job {task.name}') + task.start_ts = -1 stucked.append(task.name) time.sleep(sleep_interval) if len(stucked) > 0: diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 6192b13e5..7b6a11eec 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -140,7 +140,7 @@ class StuckedTask(ITask): def __init__(self, index) -> None: self._name = 'stucked-task' self.index = index - self._stuck_timeout = 5 + self._stuck_timeout = 3 self._start_ts = 0 self._future = Future() @@ -162,6 +162,7 @@ def start_ts(self) -> int: @start_ts.setter def start_ts(self, value: int) -> None: + print(f'Updating start_ts {self} {value}') self._start_ts = value @property @@ -181,7 +182,7 @@ def create_pipeline(self) -> Callable: class NotNeededTask(StuckedTask): def __init__(self, index: int) -> None: - self.index = index + super().__init__(index=index) self._name = 'not-needed-task' @property @@ -189,22 +190,13 @@ def needed(self) -> bool: return False process_report = ProcessReport(name=_schain_name) - target = functools.partial( - execute_tasks, - tasks=[StuckedTask(0), NotNeededTask(1)], - process_report=process_report, - shutdown_interval=10, + tasks = [StuckedTask(0), NotNeededTask(1)] + execute_tasks( + tasks=tasks, + process_report=process_report, + sleep_interval=1 ) - monitor_process = Process(target=target) - terminated = False - - try: - monitor_process.start() - monitor_process.join(timeout=50) - finally: - if monitor_process.is_alive(): - terminated = True - terminate_process(monitor_process.ident) - - assert terminated + print(tasks[0], tasks[1]) + assert tasks[0].start_ts == -1 + assert tasks[1].start_ts == 0 diff --git a/tests/schains/process_manager_test.py b/tests/schains/process_manager_test.py index 80171e749..2372fb3b2 100644 --- a/tests/schains/process_manager_test.py +++ b/tests/schains/process_manager_test.py @@ -39,7 +39,7 @@ def target_stuck_mock(*args, **kwargs): timeout = 7 - with mock.patch('core.schains.process_manager.start_monitor', target_regular_mock): + with mock.patch('core.schains.process_manager.start_tasks', target_regular_mock): run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) pid = ProcessReport(_schain_name).pid assert psutil.Process(pid).is_running() From 257fd15abede10bd703a3d25c95c41292dc7b5bf Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Wed, 25 Sep 2024 18:44:56 +0100 Subject: [PATCH 083/103] Change the order of sgx checks and improve logging --- web/routes/health.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/web/routes/health.py b/web/routes/health.py index 031963507..faa2bbede 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -132,24 +132,24 @@ def ima_log_checks(): @health_bp.route(get_api_url(BLUEPRINT_NAME, 'sgx'), methods=['GET']) def sgx_info(): logger.debug(request) - sgx = SgxClient(SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER) - status_https = False status_zmq = False + status_https = False version = None + sgx = SgxClient(SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER, zmq=True) try: - if sgx.get_server_status() == 0: - status_https = True - version = sgx.get_server_version() - except Exception as e: # todo: catch specific error - edit sgx.py - logger.info(e) - sgx_zmq = SgxClient(SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER, zmq=True) - try: - if sgx_zmq.zmq.get_server_status() == 0: + if sgx.zmq.get_server_status() == 0: status_zmq = True + version = sgx.zmq.get_server_version() + except Exception as err: + logger.error(f'Cannot make SGX ZMQ check {err}') + sgx_https = SgxClient(SGX_SERVER_URL, SGX_CERTIFICATES_FOLDER) + try: + if sgx_https.get_server_status() == 0: + status_https = True if version is None: - version = sgx_zmq.zmq.get_server_version() - except Exception as e: # todo: catch specific error - edit sgx.py - logger.info(e) + version = sgx_https.get_server_version() + except Exception as err: + logger.error(f'Cannot make SGX HTTPS check {err}') res = { 'status_zmq': status_zmq, From 39bd051a9d284b59d9f6fd5164cac7d7fdadfbfd Mon Sep 17 00:00:00 2001 From: Alex Sheverdin Date: Wed, 25 Sep 2024 18:52:56 +0100 Subject: [PATCH 084/103] Remove unused dependencies --- web/routes/health.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/web/routes/health.py b/web/routes/health.py index faa2bbede..f23367472 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -18,8 +18,6 @@ # along with this program. If not, see . import logging -import telnetlib -from enum import Enum from http import HTTPStatus @@ -27,7 +25,6 @@ from sgx import SgxClient -from urllib.parse import urlparse from core.node import get_check_report, get_skale_node_version from core.node import get_current_nodes from core.schains.checks import SChainChecks @@ -38,7 +35,6 @@ from core.schains.ima import get_ima_log_checks from core.schains.external_config import ExternalState from tools.sgx_utils import SGX_CERTIFICATES_FOLDER, SGX_SERVER_URL -from tools.configs import ZMQ_PORT, ZMQ_TIMEOUT from web.models.schain import SChainRecord from web.helper import ( construct_err_response, From e4471d0d8a675820a68e82bed6572e449c1fcbae Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 26 Sep 2024 12:02:15 +0000 Subject: [PATCH 085/103] Fix process_manager_tests --- core/schains/process_manager.py | 3 +-- tests/schains/process_manager_test.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 36e02253a..3d051f375 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -76,8 +76,7 @@ def run_pm_schain( else: pid = report.pid logger.info('%s Process is running: PID = %d', log_prefix, pid) - - if not report.is_exist() or not is_monitor_process_alive(report.pid): + else: report.ts = init_ts process = Process( name=schain.name, diff --git a/tests/schains/process_manager_test.py b/tests/schains/process_manager_test.py index 2372fb3b2..442eff4ac 100644 --- a/tests/schains/process_manager_test.py +++ b/tests/schains/process_manager_test.py @@ -35,12 +35,13 @@ def target_stuck_mock(*args, **kwargs): for _ in range(iterations): time.sleep(1) - schain = {'name': _schain_name} + schain = skale.schains.get_by_name(_schain_name) timeout = 7 with mock.patch('core.schains.process_manager.start_tasks', target_regular_mock): run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) + pid = ProcessReport(_schain_name).pid assert psutil.Process(pid).is_running() From 967ccdbaa8ea36cc003285e209498e253c628b9d Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 26 Sep 2024 16:45:35 +0000 Subject: [PATCH 086/103] Fix process_manager_test --- core/schains/cleaner.py | 2 +- core/schains/monitor/main.py | 7 ++- core/schains/process.py | 27 ++++---- core/schains/process_manager.py | 23 +++---- tests/schains/process_manager_test.py | 90 ++++++++++++++++++--------- 5 files changed, 92 insertions(+), 57 deletions(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 4865a394f..6141883ac 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -188,7 +188,7 @@ def remove_schain( logger.warning(msg) report = ProcessReport(name=schain_name) if report.is_exist(): - terminate_process(report) + terminate_process(report.pid) delete_bls_keys(skale, schain_name) sync_agent_ranges = get_sync_agent_ranges(skale) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index fe1eb63d8..1cf2c2e04 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -19,6 +19,8 @@ import functools import logging +import os +import time from typing import Callable, Optional from importlib import reload @@ -388,12 +390,15 @@ def start_tasks( schain: SchainStructure, node_config: NodeConfig, skale_ima: SkaleIma, - process_report: ProcessReport, dutils: Optional[DockerUtils] = None, ) -> bool: reload(web3_request) name = schain.name + init_ts, pid = int(time.time()), os.getpid() + logger.info('Initialazing process report %d %d', pid, init_ts) + process_report = ProcessReport(name) + process_report.update(pid, init_ts) stream_version = get_skale_node_version() schain_record = upsert_schain_record(name) diff --git a/core/schains/process.py b/core/schains/process.py index 4b4173ad0..387c767a0 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -17,11 +17,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import json import logging import os import shutil import signal -import json +from typing import Tuple import pathlib import psutil @@ -36,6 +37,19 @@ P_KILL_WAIT_TIMEOUT = 60 +def is_schain_process_report_exist(schain_name: str) -> None: + path = pathlib.Path(SCHAINS_DIR_PATH).joinpath(schain_name, ProcessReport.REPORT_FILENAME) + return path.is_file() + + +def get_schain_process_info(schain_name: str) -> Tuple[int | None, int | None]: + report = ProcessReport(schain_name) + if not ProcessReport(schain_name).is_exist(): + return None, None + else: + return report.pid, report.ts + + class ProcessReport: REPORT_FILENAME = 'process.json' @@ -98,17 +112,6 @@ def cleanup(self) -> None: os.remove(self.path) -def shutdown_process( - process_report: ProcessReport, - kill_timeout: int = P_KILL_WAIT_TIMEOUT, - log_msg: str = '' -) -> None: - pid = process_report.pid - terminate_process(pid=pid, kill_timeout=kill_timeout, log_msg=log_msg) - logger.info(f'Removing process report for {pid}') - process_report.cleanup() - - def terminate_process( pid: int, kill_timeout: int = P_KILL_WAIT_TIMEOUT, diff --git a/core/schains/process_manager.py b/core/schains/process_manager.py index 3d051f375..e14857a34 100644 --- a/core/schains/process_manager.py +++ b/core/schains/process_manager.py @@ -29,9 +29,9 @@ from core.schains.monitor.main import start_tasks from core.schains.notifications import notify_if_not_enough_balance from core.schains.process import ( + get_schain_process_info, is_monitor_process_alive, - shutdown_process, - ProcessReport, + terminate_process ) from tools.str_formatters import arguments_list_string @@ -67,26 +67,21 @@ def run_pm_schain( dkg_timeout = skale.constants_holder.get_dkg_timeout() allowed_diff = timeout or int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) - report = ProcessReport(schain.name) - init_ts = int(time.time()) - if report.is_exist() and is_monitor_process_alive(report.pid): - if init_ts - report.ts > allowed_diff: - logger.info('%s Terminating process: PID = %d', log_prefix, report.pid) - shutdown_process(report) + pid, pts = get_schain_process_info(schain.name) + if pid is not None and is_monitor_process_alive(pid): + if int(time.time()) - pts > allowed_diff: + logger.info('%s Terminating process: PID = %d', log_prefix, pid) + terminate_process(pid) else: - pid = report.pid logger.info('%s Process is running: PID = %d', log_prefix, pid) else: - report.ts = init_ts process = Process( name=schain.name, target=start_tasks, - args=(skale, schain, node_config, skale_ima, report), + args=(skale, schain, node_config, skale_ima) ) process.start() - pid = process.ident - report.pid = pid - logger.info('%s Process started: PID = %d', log_prefix, pid) + logger.info('Process started for %s', schain.name) def fetch_schains_to_monitor(skale: Skale, node_id: int) -> list: diff --git a/tests/schains/process_manager_test.py b/tests/schains/process_manager_test.py index 442eff4ac..ab215cc68 100644 --- a/tests/schains/process_manager_test.py +++ b/tests/schains/process_manager_test.py @@ -1,4 +1,5 @@ import mock +import logging import os import pathlib import shutil @@ -10,6 +11,11 @@ from core.schains.process import ProcessReport, terminate_process from core.schains.process_manager import run_pm_schain from tools.configs.schains import SCHAINS_DIR_PATH +from tests.utils import get_schain_struct + +logger = logging.getLogger(__name__) + +MAX_ITERATIONS = 100 @pytest.fixture @@ -22,41 +28,68 @@ def tmp_dir(_schain_name): shutil.rmtree(path, ignore_errors=True) -def test_run_pm_schain(tmp_dir, skale, skale_ima, node_config, _schain_name): - def target_regular_mock(*args, **kwargs): - process_report = args[-1] - iterations = 5 - for _ in range(iterations): - process_report.ts = int(time.time()) - time.sleep(1) +def target_regular_mock(*args, **kwargs): + schain_name = args[1].name + process_report = ProcessReport(schain_name) + process_report.update(os.getpid(), int(time.time())) + logger.info('Starting regular test task runner') + iterations = 5 + for i in range(iterations): + process_report.ts = int(time.time()) + logger.info('Regular test task runner beat %s', i) + time.sleep(1) - def target_stuck_mock(*args, **kwargs): - iterations = 10000 - for _ in range(iterations): - time.sleep(1) - schain = skale.schains.get_by_name(_schain_name) +def target_stuck_mock(*args, **kwargs): + schain_name = ProcessReport(args[1].name) + ProcessReport(schain_name).update(os.getpid(), int(time.time())) + logger.info('Starting stucked test task runner') + iterations = 10000 + for i in range(iterations): + logger.info('Stuck test task runner beat %s', i) + time.sleep(1) + + +def wait_for_process_report(process_report): + wait_it = 0 + while wait_it < MAX_ITERATIONS and not process_report.is_exist(): + time.sleep(0.5) + wait_it += 1 + assert process_report.is_exist() + + +def test_run_pm_schain(tmp_dir, skale, skale_ima, node_config, _schain_name): + schain = get_schain_struct(schain_name=_schain_name) timeout = 7 with mock.patch('core.schains.process_manager.start_tasks', target_regular_mock): run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) - pid = ProcessReport(_schain_name).pid - assert psutil.Process(pid).is_running() + process_report = ProcessReport(schain.name) + wait_for_process_report(process_report) - start_ts = int(time.time()) + pid = process_report.pid - while int(time.time()) - start_ts < 2 * timeout: - time.sleep(1) - assert psutil.Process(pid).status() == 'zombie' + try: + assert psutil.Process(pid).is_running() + start_ts = int(time.time()) - with mock.patch('core.schains.process_manager.start_monitor', target_stuck_mock): - run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) + while int(time.time()) - start_ts < 2 * timeout: + time.sleep(1) + assert psutil.Process(pid).status() not in ('dead', 'stopped') + finally: + pid = ProcessReport(_schain_name).pid + terminate_process(pid) - pid = ProcessReport(_schain_name).pid + old_pid = pid + wait_it = 0 + while wait_it < MAX_ITERATIONS and process_report.pid == old_pid: + time.sleep(0.5) + wait_it += 1 - assert psutil.Process(pid).is_running() + with mock.patch('core.schains.process_manager.start_tasks', target_stuck_mock): + run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) start_ts = int(time.time()) @@ -65,12 +98,11 @@ def target_stuck_mock(*args, **kwargs): psutil.Process(pid).is_running() except psutil.NoSuchProcess: break - with mock.patch('core.schains.process_manager.start_monitor', target_stuck_mock): - run_pm_schain(skale, skale_ima, node_config, schain, timeout=timeout) time.sleep(1) - with pytest.raises(psutil.NoSuchProcess): - psutil.Process(pid).is_running() - - pid = ProcessReport(_schain_name).pid - terminate_process(pid) + try: + with pytest.raises(psutil.NoSuchProcess): + psutil.Process(pid).is_running() + finally: + pid = ProcessReport(_schain_name).pid + terminate_process(pid) From 7a203dc21ff79e3f9dcea82f6f8c535bbbe787f9 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 26 Sep 2024 17:42:39 +0000 Subject: [PATCH 087/103] Remove unused code --- core/schains/monitor/action.py | 2 +- core/schains/monitor/main.py | 80 ---------------------------------- core/schains/monitor/tasks.py | 2 +- 3 files changed, 2 insertions(+), 82 deletions(-) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index b02cbab44..5e5fb301e 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -212,7 +212,7 @@ def dkg(self) -> bool: def upstream_config(self) -> bool: with self.statsd_client.timer(f'admin.action.upstream_config.{no_hyphens(self.name)}'): logger.info( - 'Creating new upstream_config rotation_id: %s, stream: %s', + 'Generating new upstream_config rotation_id: %s, stream: %s', self.rotation_data.get('rotation_id'), self.stream_version ) new_config = create_new_upstream_config( diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 1cf2c2e04..8042a8cd0 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -38,7 +38,6 @@ from core.schains.external_config import ExternalConfig, ExternalState from core.schains.monitor import get_skaled_monitor, RegularConfigMonitor, SyncConfigMonitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager -from core.schains.monitor.pipeline import Pipeline, run_pipelines from core.schains.monitor.tasks import execute_tasks, Future, ITask from core.schains.process import ProcessReport from core.schains.status import get_node_cli_status, get_skaled_status @@ -186,85 +185,6 @@ def run_skaled_pipeline( mon(skaled_am, skaled_checks).run() -def start_monitor( - skale: Skale, - schain: SchainStructure, - node_config: NodeConfig, - skale_ima: SkaleIma, - process_report: ProcessReport, - dutils: Optional[DockerUtils] = None, -) -> bool: - reload(web3_request) - name = schain.name - - stream_version = get_skale_node_version() - schain_record = upsert_schain_record(name) - - dkg_timeout = skale.constants_holder.get_dkg_timeout() - stuck_timeout = int(dkg_timeout * DKG_TIMEOUT_COEFFICIENT) - - is_rotation_active = skale.node_rotation.is_rotation_active(name) - - leaving_chain = not SYNC_NODE and not is_node_part_of_chain(skale, name, node_config.id) - if leaving_chain and not is_rotation_active: - logger.info('Not on node (%d), finishing process', node_config.id) - return True - - logger.info( - 'sync_config_run %s, config_version %s, stream_version %s', - schain_record.sync_config_run, - schain_record.config_version, - stream_version, - ) - - statsd_client = get_statsd_client() - monitor_last_seen_ts = schain_record.monitor_last_seen.timestamp() - statsd_client.incr(f'admin.schain.monitor.{no_hyphens(name)}') - statsd_client.gauge(f'admin.schain.monitor_last_seen.{no_hyphens(name)}', monitor_last_seen_ts) - - pipelines = [] - if not leaving_chain: - logger.info('Adding config pipelines to the pool') - pipelines.append( - Pipeline( - name='config', - job=functools.partial( - run_config_pipeline, - skale=skale, - skale_ima=skale_ima, - schain=schain, - node_config=node_config, - stream_version=stream_version, - ), - ) - ) - if schain_record.config_version != stream_version or ( - schain_record.sync_config_run and schain_record.first_run - ): - ConfigFileManager(name).remove_skaled_config() - else: - logger.info('Adding skaled pipeline to the pool') - pipelines.append( - Pipeline( - name='skaled', - job=functools.partial( - run_skaled_pipeline, - skale=skale, - schain=schain, - node_config=node_config, - dutils=dutils, - ), - ) - ) - - if len(pipelines) == 0: - logger.warning('No pipelines to run') - return False - - run_pipelines(pipelines=pipelines, process_report=process_report, stuck_timeout=stuck_timeout) - return True - - class SkaledTask(ITask): NAME = 'skaled' STUCK_TIMEOUT = 3600 # 1 hours diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index c6342f7a0..e591e3b48 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -77,7 +77,7 @@ def execute_tasks( logger.info('Starting task %s at %d', task.name, task.start_ts) pipeline = task.create_pipeline() task.future = executor.submit(pipeline) - if task.future.running(): + elif task.future.running(): if int(time.time()) - task.start_ts > task.stuck_timeout: logger.info('Canceling future for %s', task.name) canceled = task.future.cancel() From d8947ed9ef4004b74b871ac1b71beacec831a6f3 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 27 Sep 2024 11:50:55 +0000 Subject: [PATCH 088/103] Remove unused pipeline module --- core/schains/monitor/pipeline.py | 107 ----------------------------- tests/schains/monitor/main_test.py | 66 +----------------- 2 files changed, 1 insertion(+), 172 deletions(-) delete mode 100644 core/schains/monitor/pipeline.py diff --git a/core/schains/monitor/pipeline.py b/core/schains/monitor/pipeline.py deleted file mode 100644 index eaf5c2ccf..000000000 --- a/core/schains/monitor/pipeline.py +++ /dev/null @@ -1,107 +0,0 @@ -import logging -import queue -import random -import threading -import time - -from typing import Callable, NamedTuple -from core.schains.process import ProcessReport - -logger = logging.getLogger(__name__) - - -MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 20 -MAX_SCHAIN_MONITOR_SLEEP_INTERVAL = 40 - -SKALED_PIPELINE_SLEEP = 2 -CONFIG_PIPELINE_SLEEP = 3 -STUCK_TIMEOUT = 60 * 60 * 2 -SHUTDOWN_INTERVAL = 60 * 10 - - -class Pipeline(NamedTuple): - name: str - job: Callable - - -def run_pipelines( - pipelines: list[Pipeline], - process_report: ProcessReport, - once: bool = False, - stuck_timeout: int = STUCK_TIMEOUT, - shutdown_interval: int = SHUTDOWN_INTERVAL, -) -> None: - init_ts = time.time() - - heartbeat_queues = [queue.Queue() for _ in range(len(pipelines))] - terminating_events = [threading.Event() for _ in range(len(pipelines))] - heartbeat_ts = [init_ts for _ in range(len(pipelines))] - - threads = [ - threading.Thread( - name=pipeline.name, - target=keep_pipeline, - args=[heartbeat_queue, terminating_event, pipeline.job], - ) - for heartbeat_queue, terminating_event, pipeline in zip( - heartbeat_queues, terminating_events, pipelines - ) - ] - - for th in threads: - th.start() - - stuck = False - while not stuck: - for pindex, heartbeat_queue in enumerate(heartbeat_queues): - if not heartbeat_queue.empty(): - heartbeat_ts[pindex] = heartbeat_queue.get() - ts = int(time.time()) - if ts - heartbeat_ts[pindex] > stuck_timeout: - logger.warning( - '%s pipeline has stucked (last heartbeat %d)', - pipelines[pindex].name, - heartbeat_ts[pindex], - ) - stuck = True - break - if once and all((lambda ts: ts > init_ts, heartbeat_ts)): - logger.info('Successfully completed requested single run') - break - ts = int(time.time()) - process_report.ts = ts - - logger.info('Terminating all pipelines') - for event in terminating_events: - if not event.is_set(): - event.set() - if stuck: - logger.info('Joining threads with timeout') - for thread in threads: - thread.join(timeout=shutdown_interval) - process_report.ts = 0 - logger.warning('Stuck was detected') - - logger.info('Finishing with pipelines') - - -def keep_pipeline( - reporting_queue: queue.Queue, terminate: threading.Event, pipeline: Callable -) -> None: - while not terminate.is_set(): - logger.info('Running pipeline') - try: - pipeline() - except Exception: - logger.exception('Pipeline run failed') - terminate.set() - reporting_queue.put(time.time()) - sleep_for_a_while() - - -def sleep_for_a_while(): - schain_monitor_sleep = random.randint( - MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, MAX_SCHAIN_MONITOR_SLEEP_INTERVAL - ) - logger.info('Monitor iteration completed, sleeping for %d', schain_monitor_sleep) - time.sleep(schain_monitor_sleep) diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 7b6a11eec..1e5a1389e 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -5,15 +5,13 @@ import shutil import time from concurrent.futures import Future -from multiprocessing import Process from typing import Callable import pytest from core.schains.firewall.types import IpRange from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.monitor.main import Pipeline, run_pipelines -from core.schains.process import ProcessReport, terminate_process +from core.schains.process import ProcessReport from core.schains.monitor.tasks import execute_tasks, ITask from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import is_node_part_of_chain @@ -66,68 +64,6 @@ def test_is_node_part_of_chain(skale, schain_on_contracts, node_config): assert not chain_on_node -def test_run_pipelines(tmp_dir, _schain_name): - def simple_pipeline(index: int): - logging.info('Running simple pipeline %d', index) - time.sleep(1) - logging.info('Finishing simple pipeline %d', index) - - def stuck_pipeline(index: int): - logging.info('Running stuck pipeline %d', index) - while True: - logging.info('Stuck pipeline %d beat', index) - time.sleep(2) - - process_report = ProcessReport(name=_schain_name) - - target = functools.partial( - run_pipelines, - pipelines=[ - Pipeline(name='healthy0', job=functools.partial(simple_pipeline, index=0)), - Pipeline(name='healthy1', job=functools.partial(simple_pipeline, index=1)), - ], - process_report=process_report, - once=True, - stuck_timeout=5, - shutdown_interval=10, - ) - - terminated = False - monitor_process = Process(target=target) - try: - monitor_process.start() - monitor_process.join() - finally: - if monitor_process.is_alive(): - terminated = True - terminate_process(monitor_process.ident) - assert not terminated - - target = functools.partial( - run_pipelines, - pipelines=[ - Pipeline(name='healthy', job=functools.partial(simple_pipeline, index=0)), - Pipeline(name='stuck', job=functools.partial(stuck_pipeline, index=1)), - ], - process_report=process_report, - stuck_timeout=5, - shutdown_interval=10, - ) - - monitor_process = Process(target=target) - terminated = False - - try: - monitor_process.start() - monitor_process.join(timeout=50) - finally: - if monitor_process.is_alive(): - terminated = True - terminate_process(monitor_process.ident) - - assert terminated - - def test_execute_tasks(tmp_dir, _schain_name): def run_stuck_pipeline(index: int) -> None: logging.info('Running stuck pipeline %d', index) From 5665cc85474095075f82d1122e6bd9c85435c067 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 27 Sep 2024 15:36:12 +0000 Subject: [PATCH 089/103] Reduce task sleeping interval. Improve logs. --- core/schains/monitor/action.py | 1 + core/schains/monitor/config_monitor.py | 2 ++ core/schains/monitor/main.py | 2 +- core/schains/monitor/skaled_monitor.py | 2 ++ core/schains/monitor/tasks.py | 6 +++--- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/core/schains/monitor/action.py b/core/schains/monitor/action.py index 5e5fb301e..75dd0d5d1 100644 --- a/core/schains/monitor/action.py +++ b/core/schains/monitor/action.py @@ -229,6 +229,7 @@ def upstream_config(self) -> bool: result = False if not self.cfm.upstream_config_exists() or \ new_config != self.cfm.latest_upstream_config: + logger.info('Saving new config') rotation_id = self.rotation_data['rotation_id'] logger.info( 'Saving new upstream config rotation_id: %d, ips: %s', diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index 47587a1bc..639689870 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -45,6 +45,8 @@ def run(self): self.execute() self.am.log_executed_blocks() self.am._upd_last_seen() + except Exception as e: + logger.info('Config monitor type failed %s', typename, exc_info=e) finally: logger.info('Config monitor type finished %s', typename) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 8042a8cd0..a363659e2 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -187,7 +187,7 @@ def run_skaled_pipeline( class SkaledTask(ITask): NAME = 'skaled' - STUCK_TIMEOUT = 3600 # 1 hours + STUCK_TIMEOUT = 3600 # 1 hour def __init__( self, diff --git a/core/schains/monitor/skaled_monitor.py b/core/schains/monitor/skaled_monitor.py index a946ca2be..a0fe5c99b 100644 --- a/core/schains/monitor/skaled_monitor.py +++ b/core/schains/monitor/skaled_monitor.py @@ -55,6 +55,8 @@ def run(self): self.am._upd_schain_record() self.am.log_executed_blocks() self.am._upd_last_seen() + except Exception as e: + logger.info('Skaled monitor type failed %s', typename, exc_info=e) finally: logger.info('Skaled monitor type finished %s', typename) diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index e591e3b48..d7d450f4b 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -10,8 +10,7 @@ logger = logging.getLogger(__name__) -STUCK_TIMEOUT = 60 * 60 * 2 -SLEEP_INTERVAL = 60 * 10 +SLEEP_INTERVAL = 10 class Pipeline(NamedTuple): @@ -68,6 +67,7 @@ def execute_tasks( process_report: ProcessReport, sleep_interval: int = SLEEP_INTERVAL, ) -> None: + logger.info('Running tasks %s', tasks) with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='mon') as executor: stucked = [] while True: @@ -82,7 +82,7 @@ def execute_tasks( logger.info('Canceling future for %s', task.name) canceled = task.future.cancel() if not canceled: - logger.warning('Stuck detected for job {task.name}') + logger.warning('Stuck detected for job %s', task.name) task.start_ts = -1 stucked.append(task.name) time.sleep(sleep_interval) From f7fa83139f56ad5b910c83e1a1166c55457c47c0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 30 Sep 2024 16:21:50 +0000 Subject: [PATCH 090/103] Remove unused classes --- core/schains/cleaner.py | 5 ++--- core/schains/monitor/tasks.py | 13 ++----------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 6141883ac..53c9bd437 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -109,12 +109,11 @@ def monitor(skale, node_config, dutils=None): for schain_name in schains_on_node: if schain_name not in schain_names_on_contracts: - logger.warning(f'sChain {schain_name} was found on node, but not on contracts: \ -{schain_names_on_contracts}, going to remove it!') + logger.warning('%s was found on node, but not on contracts: %s, trying to cleanup', schain_name, schain_names_on_contracts) try: ensure_schain_removed(skale, schain_name, node_config.id, dutils=dutils) except Exception: - logger.exception(f'sChain removal {schain_name} failed') + logger.exception('%s removal failed', schain_name) logger.info('Cleanup procedure finished') diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index d7d450f4b..65e82ea32 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -2,7 +2,7 @@ import logging import time from concurrent.futures import Future, ThreadPoolExecutor -from typing import Callable, NamedTuple +from typing import Callable from core.schains.process import ProcessReport @@ -13,11 +13,6 @@ SLEEP_INTERVAL = 10 -class Pipeline(NamedTuple): - name: str - job: Callable - - class ITask(metaclass=abc.ABCMeta): @property @abc.abstractmethod @@ -58,17 +53,13 @@ def start_ts(self, value: int) -> None: pass -class StuckMonitorError(Exception): - pass - - def execute_tasks( tasks: list[ITask], process_report: ProcessReport, sleep_interval: int = SLEEP_INTERVAL, ) -> None: logger.info('Running tasks %s', tasks) - with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='mon') as executor: + with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='T') as executor: stucked = [] while True: for index, task in enumerate(tasks): From 812a5798f39077ef066c7c549f48e2bcf3a4ea01 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 1 Oct 2024 15:02:11 +0000 Subject: [PATCH 091/103] Add additional monitor tasks tests --- core/schains/monitor/main.py | 1 + core/schains/monitor/tasks.py | 2 +- tests/schains/monitor/main_test.py | 60 +++++++++++++++++++++++++++--- web/models/schain.py | 4 +- 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index a363659e2..9728bb346 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -202,6 +202,7 @@ def __init__( self.node_config = node_config self.dutils = dutils self._future = Future() + self._start_ts = 0 self.stream_version = stream_version @property diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index 65e82ea32..e15ce06aa 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -63,7 +63,7 @@ def execute_tasks( stucked = [] while True: for index, task in enumerate(tasks): - if not task.future.running() and task.needed: + if not task.future.running() and task.needed and len(stucked) == 0: task.start_ts = int(time.time()) logger.info('Starting task %s at %d', task.name, task.start_ts) pipeline = task.create_pipeline() diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 1e5a1389e..242fbe43e 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -6,15 +6,18 @@ import time from concurrent.futures import Future from typing import Callable +from unittest import mock import pytest from core.schains.firewall.types import IpRange from core.schains.firewall.utils import get_sync_agent_ranges from core.schains.process import ProcessReport +from core.schains.monitor.main import ConfigTask, SkaledTask from core.schains.monitor.tasks import execute_tasks, ITask from tools.configs.schains import SCHAINS_DIR_PATH from tools.helper import is_node_part_of_chain +from web.models.schain import upsert_schain_record @pytest.fixture @@ -64,6 +67,57 @@ def test_is_node_part_of_chain(skale, schain_on_contracts, node_config): assert not chain_on_node +def test_config_task(skale, skale_ima, schain_db, schain_on_contracts, node_config): + stream_version = '2.3.0' + config_task = ConfigTask( + schain_name=schain_on_contracts, + skale=skale, + skale_ima=skale_ima, + node_config=node_config, + stream_version=stream_version, + ) + assert config_task.needed + skale_ima.linker.has_schain = mock.Mock(return_value=True) + + def get_monitor_mock(*args, **kwargs): + result = mock.MagicMock() + result.__name__ = 'TestConfigMonitor' + return result + + with mock.patch('core.schains.monitor.main.RegularConfigMonitor', get_monitor_mock): + pipeline = config_task.create_pipeline() + pipeline() + + +def test_skaled_task(skale, schain_db, schain_on_contracts, node_config, dutils): + record = upsert_schain_record(schain_on_contracts) + stream_version = '2.3.0' + skaled_task = SkaledTask( + schain_name=schain_on_contracts, + skale=skale, + node_config=node_config, + stream_version=stream_version, + dutils=dutils, + ) + assert not skaled_task.needed + assert skaled_task.name == 'skaled' + assert skaled_task.start_ts == 0 + assert skaled_task.stuck_timeout == 3600 + + record.set_config_version(stream_version) + assert skaled_task.needed + + def get_monitor_mock(*args, **kwargs): + result = mock.MagicMock() + result.__name__ = 'TestSkaledMonitor' + return result + + with mock.patch('core.schains.monitor.main.get_skaled_monitor', get_monitor_mock): + with mock.patch('core.schains.monitor.main.notify_checks'): + pipeline = skaled_task.create_pipeline() + pipeline() + + def test_execute_tasks(tmp_dir, _schain_name): def run_stuck_pipeline(index: int) -> None: logging.info('Running stuck pipeline %d', index) @@ -127,11 +181,7 @@ def needed(self) -> bool: process_report = ProcessReport(name=_schain_name) tasks = [StuckedTask(0), NotNeededTask(1)] - execute_tasks( - tasks=tasks, - process_report=process_report, - sleep_interval=1 - ) + execute_tasks(tasks=tasks, process_report=process_report, sleep_interval=1) print(tasks[0], tasks[1]) assert tasks[0].start_ts == -1 diff --git a/web/models/schain.py b/web/models/schain.py index 264904f5a..5f92e45ca 100644 --- a/web/models/schain.py +++ b/web/models/schain.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) DEFAULT_CONFIG_VERSION = '0.0.0' -RETRY_ATTEMPTS = 10 +RETRY_ATTEMPTS = 5 TIMEOUTS = [2 ** p for p in range(RETRY_ATTEMPTS)] @@ -43,7 +43,7 @@ def wrapper(cls, *args, **kwargs): try: result = func(cls, *args, **kwargs) except OperationalError as e: - logger.exception('DB operational error') + logger.error('DB operational error. Sleeping %d', timeout, exc_info=e) error = e time.sleep(timeout) else: From 0fa074c7dbd9b5e72a4868a2db0a8f6b246c7627 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 1 Oct 2024 15:55:53 +0000 Subject: [PATCH 092/103] Fix linter --- core/schains/cleaner.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 53c9bd437..f006d439a 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -109,7 +109,11 @@ def monitor(skale, node_config, dutils=None): for schain_name in schains_on_node: if schain_name not in schain_names_on_contracts: - logger.warning('%s was found on node, but not on contracts: %s, trying to cleanup', schain_name, schain_names_on_contracts) + logger.warning( + '%s was found on node, but not on contracts: %s, trying to cleanup', + schain_name, + schain_names_on_contracts, + ) try: ensure_schain_removed(skale, schain_name, node_config.id, dutils=dutils) except Exception: @@ -240,9 +244,7 @@ def cleanup_schain( ) check_status = checks.get_all() if check_status['skaled_container'] or is_exited( - schain_name, - container_type=ContainerType.schain, - dutils=dutils + schain_name, container_type=ContainerType.schain, dutils=dutils ): remove_schain_container(schain_name, dutils=dutils) if check_status['volume']: @@ -259,9 +261,7 @@ def cleanup_schain( rc.cleanup() if estate is not None and estate.ima_linked: if check_status.get('ima_container', False) or is_exited( - schain_name, - container_type=ContainerType.ima, - dutils=dutils + schain_name, container_type=ContainerType.ima, dutils=dutils ): remove_ima_container(schain_name, dutils=dutils) if check_status['config_dir']: From 20f3533163cfab08306c29d249a892f8feead480 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 3 Oct 2024 17:39:47 +0000 Subject: [PATCH 093/103] Add can-update call --- core/updates.py | 13 ++++++++ tests/routes/node_test.py | 69 ++++++++++++++++++++++++++++++--------- web/routes/node.py | 9 +++++ web/routes/schains.py | 1 - 4 files changed, 75 insertions(+), 17 deletions(-) diff --git a/core/updates.py b/core/updates.py index dba476e7b..6251e0244 100644 --- a/core/updates.py +++ b/core/updates.py @@ -23,6 +23,8 @@ from core.node_config import NodeConfig from core.ima.schain import update_predeployed_ima +from core.schains.cleaner import get_schains_on_node +from tools.docker_utils import DockerUtils logger = logging.getLogger(__name__) @@ -56,3 +58,14 @@ def update_node_config_file(skale: Skale, node_config: NodeConfig) -> None: node_config.ip = ip if node_config.name != name: node_config.name = name + + +def is_update_possible(skale: Skale, node_config: NodeConfig, dutils: DockerUtils) -> bool: + schains_on_node = get_schains_on_node(dutils=dutils) + result = True + + for schain_name in schains_on_node: + if skale.node_rotation.is_rotation_active(schain_name): + logger.info('Rotation for %s is in progress', schain_name) + result = False + return result diff --git a/tests/routes/node_test.py b/tests/routes/node_test.py index 61e351d6b..c56e8291b 100644 --- a/tests/routes/node_test.py +++ b/tests/routes/node_test.py @@ -40,22 +40,24 @@ def handler(sender, **kwargs): yield app.test_client() -@pytest.fixture -def node_contracts(skale): - ip, public_ip, port, name = generate_random_node_data() - skale.manager.create_node(ip, port, name, - domain_name=DEFAULT_DOMAIN_NAME, wait_for=True) - node_id = skale.nodes.node_name_to_index(name) - yield node_id - skale.nodes.init_exit(node_id) - skale.manager.node_exit(node_id, wait_for=True) - - -@pytest.fixture -def node_config(node_contracts): - config = NodeConfig() - config.id = node_contracts - return config +# @pytest.fixture +# def node_contracts(skale): +# ip, public_ip, port, name = generate_random_node_data() +# skale.manager.create_node(ip, port, name, +# domain_name=DEFAULT_DOMAIN_NAME, wait_for=True) +# node_id = skale.nodes.node_name_to_index(name) +# try: +# yield node_id +# finally: +# skale.nodes.init_exit(node_id) +# skale.manager.node_exit(node_id, wait_for=True) + + +# @pytest.fixture +# def node_config(node_contracts): +# config = NodeConfig() +# config.id = node_contracts +# return config def test_node_info(skale_bp, skale, node_config): @@ -272,3 +274,38 @@ def test_exit_maintenance(skale_bp, node_config_in_maintenance): ) assert data['status'] == 'error' data['payload'] == {} + + +@pytest.fixture +def skale_bp_node(skale, node_config, schain_on_contracts, schain_db, dutils): + app = Flask(__name__) + app.register_blueprint(node_bp) + + def handler(sender, **kwargs): + g.docker_utils = dutils + g.wallet = skale.wallet + g.config = node_config + + with appcontext_pushed.connected_to(handler, app): + yield app.test_client() + + +def test_can_update(skale, node_config, skale_bp_node): + data = get_bp_data( + skale_bp_node, + get_api_url(BLUEPRINT_NAME, 'can-update'), + ) + assert data['status'] == 'ok' + data['payload'] == {'can-update': True} + + print(skale.schains.get_schains_for_node(node_config.id)) + skale.nodes.init_exit(node_config.id) + skale.manager.node_exit(node_config.id) + + data = get_bp_data( + skale_bp_node, + get_api_url(BLUEPRINT_NAME, 'can-update'), + ) + print(data) + data['payload'] == {'can-update': False} + assert data['status'] == 'ok' diff --git a/web/routes/node.py b/web/routes/node.py index 373603383..9748be28a 100644 --- a/web/routes/node.py +++ b/web/routes/node.py @@ -29,6 +29,7 @@ from core.node import get_meta_info, get_node_hardware_info, get_btrfs_info, get_abi_hash from core.node import check_validator_nodes +from core.updates import is_update_possible from tools.configs.web3 import ABI_FILEPATH, ENDPOINT, UNTRUSTED_PROVIDERS @@ -266,3 +267,11 @@ def ima_abi(): logger.debug(request) abi_hash = get_abi_hash(MAINNET_IMA_ABI_FILEPATH) return construct_ok_response(data=abi_hash) + + +@node_bp.route(get_api_url(BLUEPRINT_NAME, 'can-update'), methods=['GET']) +@g_skale +def update_possible(): + logger.debug(request) + possible = is_update_possible(g.skale, g.config, g.docker_utils) + return construct_ok_response(data={'can_update': possible}) diff --git a/web/routes/schains.py b/web/routes/schains.py index 060a83ffd..58963321e 100644 --- a/web/routes/schains.py +++ b/web/routes/schains.py @@ -85,7 +85,6 @@ def schain_config(): @schains_bp.route(get_api_url(BLUEPRINT_NAME, 'list'), methods=['GET']) @g_skale def schains_list(): - logger.debug(request) logger.debug(request) node_id = g.config.id if node_id is None: From a2d7d258849061d8c0a94f5b5e6e9c3ba5a6c1ce Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 4 Oct 2024 15:00:23 +0000 Subject: [PATCH 094/103] Fix SchainStructure usage --- core/schains/config/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/schains/config/generator.py b/core/schains/config/generator.py index 3ea22bade..2af161dc5 100644 --- a/core/schains/config/generator.py +++ b/core/schains/config/generator.py @@ -161,7 +161,7 @@ def generate_schain_config( 'chainID': get_chain_id(schain.name) } - legacy_groups = static_groups(schain['name']) + legacy_groups = static_groups(schain.name) logger.debug('Legacy node groups: %s', legacy_groups) logger.debug('Vanilla node groups: %s', node_groups) node_groups.update(legacy_groups) From 1bbda9191fc9f8a191f2b545bbe7fe6da3413989 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 7 Oct 2024 12:20:48 +0000 Subject: [PATCH 095/103] Fix tests --- core/updates.py | 18 +++++++++++------- tests/routes/node_test.py | 40 ++++++++++++++++++++++++--------------- web/routes/node.py | 12 ++++++------ 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/core/updates.py b/core/updates.py index 6251e0244..5f360e62a 100644 --- a/core/updates.py +++ b/core/updates.py @@ -23,10 +23,10 @@ from core.node_config import NodeConfig from core.ima.schain import update_predeployed_ima +from core.schains.config.file_manager import ConfigFileManager from core.schains.cleaner import get_schains_on_node from tools.docker_utils import DockerUtils - logger = logging.getLogger(__name__) @@ -60,12 +60,16 @@ def update_node_config_file(skale: Skale, node_config: NodeConfig) -> None: node_config.name = name -def is_update_possible(skale: Skale, node_config: NodeConfig, dutils: DockerUtils) -> bool: +def update_unsafe_for_schains(skale: Skale, node_config: NodeConfig, dutils: DockerUtils) -> list[str]: schains_on_node = get_schains_on_node(dutils=dutils) - result = True - + unsafe_chains = [] for schain_name in schains_on_node: + cfm = ConfigFileManager(schain_name=schain_name) if skale.node_rotation.is_rotation_active(schain_name): - logger.info('Rotation for %s is in progress', schain_name) - result = False - return result + logger.info('Rotation is in progress for %s', schain_name) + unsafe_chains.append(schain_name) + # To handle the gap between SM finish ts and skaled exit time + elif cfm.skaled_config_exists() and not cfm.skaled_config_synced_with_upstream(): + logger.info('Skaled config is not synced with upstream for %s', schain_name) + unsafe_chains.append(schain_name) + return unsafe_chains diff --git a/tests/routes/node_test.py b/tests/routes/node_test.py index c56e8291b..fa7cc0e90 100644 --- a/tests/routes/node_test.py +++ b/tests/routes/node_test.py @@ -14,11 +14,13 @@ from core.node import Node, NodeStatus from core.node_config import NodeConfig -from tests.utils import get_bp_data, post_bp_data +from core.schains.config.file_manager import ConfigFileManager from tools.configs.tg import TG_API_KEY, TG_CHAT_ID from web.routes.node import node_bp from web.helper import get_api_url +from tests.utils import get_bp_data, post_bp_data + CURRENT_TIMESTAMP = 1594903080 CURRENT_DATETIME = datetime.datetime.utcfromtimestamp(CURRENT_TIMESTAMP) @@ -277,35 +279,43 @@ def test_exit_maintenance(skale_bp, node_config_in_maintenance): @pytest.fixture -def skale_bp_node(skale, node_config, schain_on_contracts, schain_db, dutils): +def skale_node_bp(skale, node_config, dutils): app = Flask(__name__) app.register_blueprint(node_bp) def handler(sender, **kwargs): g.docker_utils = dutils g.wallet = skale.wallet - g.config = node_config + g.config = NodeConfig() with appcontext_pushed.connected_to(handler, app): yield app.test_client() -def test_can_update(skale, node_config, skale_bp_node): +def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale_node_bp): data = get_bp_data( - skale_bp_node, - get_api_url(BLUEPRINT_NAME, 'can-update'), + skale_node_bp, + get_api_url(BLUEPRINT_NAME, 'update-safe'), ) assert data['status'] == 'ok' - data['payload'] == {'can-update': True} + assert data['payload'] == {'update_safe': True, 'unsafe_chains': []} + + with mock.patch('web.helper.init_skale', return_value=skale): + skale.node_rotation.is_rotation_active = mock.Mock(return_value=True) + data = get_bp_data( + skale_node_bp, + get_api_url(BLUEPRINT_NAME, 'update-safe'), + ) + + assert data['payload'] == {'update_safe': False, 'unsafe_chains': [schain_on_contracts]} + + cfm = ConfigFileManager(schain_on_contracts) - print(skale.schains.get_schains_for_node(node_config.id)) - skale.nodes.init_exit(node_config.id) - skale.manager.node_exit(node_config.id) + cfm.save_skaled_config({}) data = get_bp_data( - skale_bp_node, - get_api_url(BLUEPRINT_NAME, 'can-update'), + skale_node_bp, + get_api_url(BLUEPRINT_NAME, 'update-safe'), ) - print(data) - data['payload'] == {'can-update': False} - assert data['status'] == 'ok' + + assert data['payload'] == {'update_safe': False, 'unsafe_chains': [schain_on_contracts]} diff --git a/web/routes/node.py b/web/routes/node.py index 9748be28a..0d2a5e88a 100644 --- a/web/routes/node.py +++ b/web/routes/node.py @@ -29,8 +29,7 @@ from core.node import get_meta_info, get_node_hardware_info, get_btrfs_info, get_abi_hash from core.node import check_validator_nodes -from core.updates import is_update_possible - +from core.updates import update_unsafe_for_schains from tools.configs.web3 import ABI_FILEPATH, ENDPOINT, UNTRUSTED_PROVIDERS from tools.configs.ima import MAINNET_IMA_ABI_FILEPATH @@ -269,9 +268,10 @@ def ima_abi(): return construct_ok_response(data=abi_hash) -@node_bp.route(get_api_url(BLUEPRINT_NAME, 'can-update'), methods=['GET']) +@node_bp.route(get_api_url(BLUEPRINT_NAME, 'update-safe'), methods=['GET']) @g_skale -def update_possible(): +def update_safe(): logger.debug(request) - possible = is_update_possible(g.skale, g.config, g.docker_utils) - return construct_ok_response(data={'can_update': possible}) + unsafe_chains = update_unsafe_for_schains(g.skale, g.config, g.docker_utils) + safe = len(unsafe_chains) == 0 + return construct_ok_response(data={'update_safe': safe, 'unsafe_chains': unsafe_chains}) From b510641ecbb704143618d1888e0adb8a9864da07 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 7 Oct 2024 17:59:08 +0000 Subject: [PATCH 096/103] Fix linter --- core/updates.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/updates.py b/core/updates.py index 5f360e62a..9a93e4eb9 100644 --- a/core/updates.py +++ b/core/updates.py @@ -60,7 +60,11 @@ def update_node_config_file(skale: Skale, node_config: NodeConfig) -> None: node_config.name = name -def update_unsafe_for_schains(skale: Skale, node_config: NodeConfig, dutils: DockerUtils) -> list[str]: +def update_unsafe_for_schains( + skale: Skale, + node_config: NodeConfig, + dutils: DockerUtils +) -> list[str]: schains_on_node = get_schains_on_node(dutils=dutils) unsafe_chains = [] for schain_name in schains_on_node: From 028fd27035d44eda35379a57a65e4feeae21d024 Mon Sep 17 00:00:00 2001 From: badrogger Date: Mon, 7 Oct 2024 18:28:39 +0000 Subject: [PATCH 097/103] Improve node routes tests --- tests/routes/node_test.py | 48 ++++++--------------------------------- 1 file changed, 7 insertions(+), 41 deletions(-) diff --git a/tests/routes/node_test.py b/tests/routes/node_test.py index fa7cc0e90..80f66ab52 100644 --- a/tests/routes/node_test.py +++ b/tests/routes/node_test.py @@ -29,7 +29,7 @@ @pytest.fixture -def skale_bp(skale, dutils): +def skale_bp(skale, node_config, dutils): app = Flask(__name__) app.register_blueprint(node_bp) @@ -42,34 +42,14 @@ def handler(sender, **kwargs): yield app.test_client() -# @pytest.fixture -# def node_contracts(skale): -# ip, public_ip, port, name = generate_random_node_data() -# skale.manager.create_node(ip, port, name, -# domain_name=DEFAULT_DOMAIN_NAME, wait_for=True) -# node_id = skale.nodes.node_name_to_index(name) -# try: -# yield node_id -# finally: -# skale.nodes.init_exit(node_id) -# skale.manager.node_exit(node_id, wait_for=True) - - -# @pytest.fixture -# def node_config(node_contracts): -# config = NodeConfig() -# config.id = node_contracts -# return config - - -def test_node_info(skale_bp, skale, node_config): +def test_node_info(skale_bp, skale, node_config, node_wallets): data = get_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'info')) status = NodeStatus.ACTIVE.value assert data['status'] == 'ok' node_info = data['payload']['node_info'] assert node_info['id'] == node_config.id assert node_info['status'] == status - assert to_checksum_address(node_info['owner']) == skale.wallet.address + assert to_checksum_address(node_info['owner']) == node_wallets[0].address def register_mock(self, ip, public_ip, port, name, domain_name, gas_limit=None, @@ -278,23 +258,9 @@ def test_exit_maintenance(skale_bp, node_config_in_maintenance): data['payload'] == {} -@pytest.fixture -def skale_node_bp(skale, node_config, dutils): - app = Flask(__name__) - app.register_blueprint(node_bp) - - def handler(sender, **kwargs): - g.docker_utils = dutils - g.wallet = skale.wallet - g.config = NodeConfig() - - with appcontext_pushed.connected_to(handler, app): - yield app.test_client() - - -def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale_node_bp): +def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale_bp): data = get_bp_data( - skale_node_bp, + skale_bp, get_api_url(BLUEPRINT_NAME, 'update-safe'), ) assert data['status'] == 'ok' @@ -303,7 +269,7 @@ def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale with mock.patch('web.helper.init_skale', return_value=skale): skale.node_rotation.is_rotation_active = mock.Mock(return_value=True) data = get_bp_data( - skale_node_bp, + skale_bp, get_api_url(BLUEPRINT_NAME, 'update-safe'), ) @@ -314,7 +280,7 @@ def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale cfm.save_skaled_config({}) data = get_bp_data( - skale_node_bp, + skale_bp, get_api_url(BLUEPRINT_NAME, 'update-safe'), ) From bea1a351652a4663f2dd28e131d8e93c346a3bd8 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 8 Oct 2024 16:58:01 +0000 Subject: [PATCH 098/103] Fix test_run_monitor_for_schain_left test --- tests/schains/monitor/main_test.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/schains/monitor/main_test.py b/tests/schains/monitor/main_test.py index 865fe081d..f637b4c96 100644 --- a/tests/schains/monitor/main_test.py +++ b/tests/schains/monitor/main_test.py @@ -83,13 +83,14 @@ def test_run_monitor_for_schain_left( ): schain_not_exists = 'not-on-node' upsert_schain_record(schain_not_exists) - with mock.patch('core.schains.monitor.main.keep_tasks_running') as keep_tasks_running_mock: - run_monitor_for_schain( - skale, - skale_ima, - node_config, - get_schain_struct(schain_name=schain_db), - dutils=dutils, - once=True - ) - keep_tasks_running_mock.assert_not_called() + with mock.patch('core.schains.monitor.main.is_node_part_of_chain', return_value=False): + with mock.patch('core.schains.monitor.main.keep_tasks_running') as keep_tasks_running_mock: + run_monitor_for_schain( + skale, + skale_ima, + node_config, + get_schain_struct(schain_name=schain_not_exists), + dutils=dutils, + once=True + ) + keep_tasks_running_mock.assert_not_called() From 38a15cdca0f17566d1f8664ca2613ee3a99e3293 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 8 Oct 2024 18:32:00 +0000 Subject: [PATCH 099/103] Fix tests --- tests/routes/node_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/routes/node_test.py b/tests/routes/node_test.py index 80f66ab52..c0bc291a0 100644 --- a/tests/routes/node_test.py +++ b/tests/routes/node_test.py @@ -267,13 +267,13 @@ def test_update_safe(skale, schain_on_contracts, schain_config, upstreams, skale assert data['payload'] == {'update_safe': True, 'unsafe_chains': []} with mock.patch('web.helper.init_skale', return_value=skale): - skale.node_rotation.is_rotation_active = mock.Mock(return_value=True) - data = get_bp_data( - skale_bp, - get_api_url(BLUEPRINT_NAME, 'update-safe'), - ) - - assert data['payload'] == {'update_safe': False, 'unsafe_chains': [schain_on_contracts]} + with mock.patch.object(skale.node_rotation, 'is_rotation_active', return_value=False): + skale.node_rotation.is_rotation_active = mock.Mock(return_value=True) + data = get_bp_data( + skale_bp, + get_api_url(BLUEPRINT_NAME, 'update-safe'), + ) + assert data['payload'] == {'update_safe': False, 'unsafe_chains': [schain_on_contracts]} cfm = ConfigFileManager(schain_on_contracts) From 217b9f56caba24847a97b70745e017a59160b677 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 15 Oct 2024 15:17:08 +0000 Subject: [PATCH 100/103] Fix config generator test --- .github/workflows/test.yml | 2 +- tests/schains/config/generator_test.py | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f5cc084e9..59296eb3d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,5 @@ name: Test -on: [push, pull_request] +on: [push] env: ETH_PRIVATE_KEY: ${{ secrets.ETH_PRIVATE_KEY }} SCHAIN_TYPE: ${{ secrets.SCHAIN_TYPE }} diff --git a/tests/schains/config/generator_test.py b/tests/schains/config/generator_test.py index 1b23a383e..e2281182c 100644 --- a/tests/schains/config/generator_test.py +++ b/tests/schains/config/generator_test.py @@ -734,17 +734,13 @@ def test_generate_config_static_groups( node_id, generation, rotation_id = 1, 1, 0 ecdsa_key_name = 'test' - schain_data = { - 'name': _schain_name, - 'partOfNode': 0, - 'generation': 1, - 'mainnetOwner': TEST_MAINNET_OWNER_ADDRESS, - 'originator': TEST_ORIGINATOR_ADDRESS, - 'multitransactionMode': True - } + schain = get_schain_struct(schain_name=_schain_name) + schain.mainnet_owner = TEST_MAINNET_OWNER_ADDRESS + schain.originator = TEST_ORIGINATOR_ADDRESS + schain.options.multitransaction_mode = True schain_config = generate_schain_config( - schain=schain_data, + schain=schain, node=TEST_NODE, node_id=node_id, ecdsa_key_name=ecdsa_key_name, From be712d6b1835dddad1e1f857ee104f9884d5a78b Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 16 Oct 2024 11:56:24 +0000 Subject: [PATCH 101/103] Remove unused code --- core/schains/monitor/main.py | 12 +++------ core/schains/monitor/tasks.py | 4 +-- core/schains/process.py | 1 - core/schains/task.py | 51 ----------------------------------- tests/schains/task_test.py | 33 ----------------------- 5 files changed, 5 insertions(+), 96 deletions(-) delete mode 100644 core/schains/task.py delete mode 100644 tests/schains/task_test.py diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 9728bb346..b3c589724 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -52,12 +52,6 @@ from web.models.schain import SChainRecord, upsert_schain_record -MIN_SCHAIN_MONITOR_SLEEP_INTERVAL = 20 -MAX_SCHAIN_MONITOR_SLEEP_INTERVAL = 40 - -STUCK_TIMEOUT = 60 * 60 * 2 -SHUTDOWN_INTERVAL = 60 * 10 - logger = logging.getLogger(__name__) @@ -187,7 +181,7 @@ def run_skaled_pipeline( class SkaledTask(ITask): NAME = 'skaled' - STUCK_TIMEOUT = 3600 # 1 hour + STUCK_TIMEOUT_SECONDS = 3600 # 1 hour def __init__( self, @@ -211,7 +205,7 @@ def name(self) -> str: @property def stuck_timeout(self) -> int: - return self.STUCK_TIMEOUT + return self.STUCK_TIMEOUT_SECONDS @property def future(self) -> Future: @@ -248,7 +242,7 @@ def create_pipeline(self) -> Callable: class ConfigTask(ITask): NAME = 'config' - STUCK_TIMEOUT = 60 * 60 * 2 + STUCK_TIMEOUT_SECONDS = 60 * 60 * 2 def __init__( self, diff --git a/core/schains/monitor/tasks.py b/core/schains/monitor/tasks.py index e15ce06aa..7fcc86f82 100644 --- a/core/schains/monitor/tasks.py +++ b/core/schains/monitor/tasks.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -SLEEP_INTERVAL = 10 +SLEEP_INTERVAL_SECONDS = 10 class ITask(metaclass=abc.ABCMeta): @@ -56,7 +56,7 @@ def start_ts(self, value: int) -> None: def execute_tasks( tasks: list[ITask], process_report: ProcessReport, - sleep_interval: int = SLEEP_INTERVAL, + sleep_interval: int = SLEEP_INTERVAL_SECONDS, ) -> None: logger.info('Running tasks %s', tasks) with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='T') as executor: diff --git a/core/schains/process.py b/core/schains/process.py index 387c767a0..1da149995 100644 --- a/core/schains/process.py +++ b/core/schains/process.py @@ -33,7 +33,6 @@ logger = logging.getLogger(__name__) -TIMEOUT_COEFFICIENT = 2.2 P_KILL_WAIT_TIMEOUT = 60 diff --git a/core/schains/task.py b/core/schains/task.py deleted file mode 100644 index b95a8eb92..000000000 --- a/core/schains/task.py +++ /dev/null @@ -1,51 +0,0 @@ -import logging -import time -from concurrent.futures import Future, ThreadPoolExecutor -from typing import Callable, List, Optional - -logger = logging.getLogger(__name__) - - -class Task: - def __init__( - self, - name: str, - action: Callable, - index: int = 0, - sleep: int = 2 - ) -> None: - self.name = name - self.index = index - self.action = action - self.sleep = sleep - - def run(self) -> None: - try: - self.action() - except Exception as e: - logger.exception('Task %s failed with %s', self.name, e) - logger.info('Sleeping after task execution for %d', self.sleep) - time.sleep(self.sleep) - - -def keep_tasks_running( - executor: ThreadPoolExecutor, - tasks: List[Task], - futures: List[Optional[Future]] -) -> None: - for i, task in enumerate(tasks): - future = futures[i] - if future is not None and not future.running(): - result = future.result() - logger.info('Task %s finished with %s', task.name, result) - if future is None or not future.running(): - logger.info('Running task %s', task.name) - futures[i] = executor.submit(task.run) - - -def run_tasks(name: str, tasks: List[Task]) -> None: - with ThreadPoolExecutor(max_workers=len(tasks), thread_name_prefix='T') as executor: - futures: List[Optional[Future]] = [None for i in range(len(tasks))] - while True: - keep_tasks_running(executor, tasks, futures) - time.sleep(30) diff --git a/tests/schains/task_test.py b/tests/schains/task_test.py deleted file mode 100644 index f5c574094..000000000 --- a/tests/schains/task_test.py +++ /dev/null @@ -1,33 +0,0 @@ -import functools -import time - -import pytest - -from core.schains.task import run_tasks, Task - -ITERATIONS = 10 -SCHAINS_NUM = 10 - - -class StopActionError(Exception): - pass - - -def action(name): - for i in range(ITERATIONS): - time.sleep(2) - raise StopActionError(f'Stopping {name}') - - -@pytest.mark.skip -def test_tasks(): - tasks = [ - Task( - f'test-schain-{i}', - functools.partial(action, name=f'test-schain-{i}'), - i - ) - for i in range(SCHAINS_NUM) - ] - run_tasks(tasks=tasks) - time.sleep(3) From 0ac7970614a007627033ac01f60317b9013bb7f0 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 16 Oct 2024 11:56:55 +0000 Subject: [PATCH 102/103] Run tests only for push --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f5cc084e9..59296eb3d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,5 @@ name: Test -on: [push, pull_request] +on: [push] env: ETH_PRIVATE_KEY: ${{ secrets.ETH_PRIVATE_KEY }} SCHAIN_TYPE: ${{ secrets.SCHAIN_TYPE }} From fa1c622a513b57f2b0e72bfb3e91b0ac6e424084 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 17 Oct 2024 16:23:10 +0000 Subject: [PATCH 103/103] Fix tests --- tests/schains/monitor/containers_test.py | 4 ---- tests/schains/monitor/rpc_test.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/schains/monitor/containers_test.py b/tests/schains/monitor/containers_test.py index 8bca146b4..6068a7785 100644 --- a/tests/schains/monitor/containers_test.py +++ b/tests/schains/monitor/containers_test.py @@ -74,10 +74,6 @@ def test_monitor_schain_container_ec( schain_record = upsert_schain_record(schain_db) schain = get_schain_struct(schain_name=schain_db) - run_custom_schain_container(dutils, schain.name, entrypoint=['sh', 'exit', '1']) - # To make sure container initializaed - time.sleep(2) - run_custom_schain_container(dutils, schain.name, entrypoint=['sh', 'exit', '1']) # To make sure container initializaed time.sleep(2) diff --git a/tests/schains/monitor/rpc_test.py b/tests/schains/monitor/rpc_test.py index 07a7f9ba6..8d93158bf 100644 --- a/tests/schains/monitor/rpc_test.py +++ b/tests/schains/monitor/rpc_test.py @@ -1,6 +1,6 @@ import datetime -import time import json +import time from unittest import mock import freezegun