From 2e387c8864b3cf6bfb791f418eec0beef73b22d4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Tue, 9 Jul 2024 09:25:56 +0000 Subject: [PATCH 01/13] Add additional logs (schain record fields) --- core/schains/monitor/main.py | 1 + web/models/schain.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 27405872..ba57ba2a 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -137,6 +137,7 @@ def run_skaled_pipeline( ) -> None: name = schain['name'] schain_record = SChainRecord.get_by_name(name) + logger.info('Record: %s', SChainRecord.to_dict(schain_record)) dutils = dutils or DockerUtils() diff --git a/web/models/schain.py b/web/models/schain.py index c685d864..a7f67eb7 100644 --- a/web/models/schain.py +++ b/web/models/schain.py @@ -98,7 +98,13 @@ def to_dict(cls, record): 'monitor_last_seen': record.monitor_last_seen.timestamp(), 'monitor_id': record.monitor_id, 'config_version': record.config_version, - 'ssl_change_date': record.ssl_change_date.timestamp() + 'ssl_change_date': record.ssl_change_date.timestamp(), + 'repair_mode': record.repair_mode, + 'backup_run': record.backup_run, + 'sync_config_run': record.sync_config_run, + 'snapshot_from': record.snapshot_from, + 'restart_count': record.restart_count, + 'failed_rpc_count': record.failed_rpc_count } def upload(self, *args, **kwargs) -> None: From 7c712fbb8556eb0b7a30d6b0e47b3b4da9c0c4f4 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 4 Jul 2024 11:55:32 +0000 Subject: [PATCH 02/13] Add default gas price for tests --- scripts/helper.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/helper.sh b/scripts/helper.sh index afebbca7..b8f15641 100644 --- a/scripts/helper.sh +++ b/scripts/helper.sh @@ -23,6 +23,7 @@ export_test_env () { export SCHAIN_STOP_TIMEOUT=1 export ABI_FILEPATH=${ABI_FILEPATH="$PWD/helper-scripts/contracts_data/manager.json"} export IMA_ABI_FILEPATH=${IMA_ABI_FILEPATH} + export DEFAULT_GAS_PRICE_WEI=1000000000 cp $PWD/helper-scripts/contracts_data/ima.json $SKALE_DIR_HOST/contracts_info } From 1791df32f88504d29e9eaedd771739d5e57ac202 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 4 Jul 2024 15:32:29 +0000 Subject: [PATCH 03/13] Extract firewall tests into separate GA action --- .github/workflows/test.yml | 15 ++++++++++++++- scripts/run_core_tests.sh | 4 +--- scripts/run_firewall_test.sh | 11 +++++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ae31339a..6c5c8be6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,14 +40,27 @@ jobs: run: | sudo lsblk -f sudo free -h + - name: Run core tests run: | bash ./scripts/run_core_tests.sh - - name: Show stats after tests + + - name: Show stats after core tests + if: always() + run: | + sudo lsblk -f + sudo free -h + + - name: Run firewall tests + run: | + bash ./scripts/run_firewall_tests.sh + + - name: Show stats after firewall tests if: always() run: | sudo lsblk -f sudo free -h + - name: Run codecov run: | codecov -t $CODECOV_TOKEN diff --git a/scripts/run_core_tests.sh b/scripts/run_core_tests.sh index 9987b4af..ae867c12 100755 --- a/scripts/run_core_tests.sh +++ b/scripts/run_core_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -set -e +set -ea export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" source $DIR/helper.sh @@ -15,5 +15,3 @@ bash scripts/run_redis.sh py.test --cov-config=.coveragerc --cov=. tests/ --ignore=tests/firewall $@ tests_cleanup -scripts/run_firewall_test.sh -tests_cleanup diff --git a/scripts/run_firewall_test.sh b/scripts/run_firewall_test.sh index 037007ba..6858b80e 100755 --- a/scripts/run_firewall_test.sh +++ b/scripts/run_firewall_test.sh @@ -1,8 +1,13 @@ #!/usr/bin/env bash -set -e +set -ea + +export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +source $DIR/helper.sh +source $DIR/../helper-scripts/helper.sh + +export_test_env docker rm -f test-firewall || true -DIR=$PWD docker build -t admin:base . docker build -f tests.Dockerfile -t test-firewall . docker run -v "$DIR/tests/skale-data/node_data":"/skale_node_data" \ @@ -16,3 +21,5 @@ docker run -v "$DIR/tests/skale-data/node_data":"/skale_node_data" \ -e SKALE_DIR_HOST=/skale_dir_host \ --cap-add=NET_ADMIN --cap-add=NET_RAW \ --name test-firewall test-firewall pytest --cov core.schains.firewall tests/firewall/ $@ + +tests_cleanup From feca96ca6bb50245369c1454ecc7b4b1a0083951 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 4 Jul 2024 16:34:04 +0000 Subject: [PATCH 04/13] Fix github action test.yml --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6c5c8be6..418609d1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,7 +53,7 @@ jobs: - name: Run firewall tests run: | - bash ./scripts/run_firewall_tests.sh + bash ./scripts/run_firewall_test.sh - name: Show stats after firewall tests if: always() From 5b2dd256447cd3b9bb4bfbf77b34f30ed83f086f Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 4 Jul 2024 18:01:31 +0000 Subject: [PATCH 05/13] Add images cleanup after core tests --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 418609d1..9da27ba6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,6 +8,7 @@ env: SGX_WALLET_TAG: "1.83.0-beta.5" CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} PYTHON_VERSION: 3.11 + jobs: test_core: runs-on: ubuntu-latest @@ -20,21 +21,27 @@ jobs: - uses: actions/checkout@v2 with: submodules: true + - name: Set up Python ${{ env.PYTHON_VERSION }} uses: actions/setup-python@v1 with: python-version: ${{ env.PYTHON_VERSION }} + - name: Install python dependencies run: bash ./scripts/install_python_dependencies.sh + - name: Lint with flake8 run: flake8 . + - name: Launch anvil node run: | docker run -d --network host --name anvil ghcr.io/foundry-rs/foundry anvil && sleep 5 && docker logs anvil --tail 1000 + - name: Deploy manager & ima contracts run: | bash ./helper-scripts/deploy_test_ima.sh docker rmi -f skalenetwork/skale-manager:${{ env.MANAGER_TAG }} + - name: Show stats before tests if: always() run: | @@ -44,6 +51,7 @@ jobs: - name: Run core tests run: | bash ./scripts/run_core_tests.sh + docker rmi -f $(docker ps -aq) - name: Show stats after core tests if: always() From 82661930d5072d997224fb50e23e6085944b899e Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 5 Jul 2024 09:00:51 +0000 Subject: [PATCH 06/13] Fix image cleanup --- .github/workflows/test.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9da27ba6..6f7720c2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,6 +40,9 @@ jobs: - name: Deploy manager & ima contracts run: | bash ./helper-scripts/deploy_test_ima.sh + + - name: Cleanup skale-manager image + run: | docker rmi -f skalenetwork/skale-manager:${{ env.MANAGER_TAG }} - name: Show stats before tests @@ -51,7 +54,11 @@ jobs: - name: Run core tests run: | bash ./scripts/run_core_tests.sh - docker rmi -f $(docker ps -aq) + + - name: Cleanup docker artifacts + run: | + docker rm -f $(docker ps -aq) + docker rmi -f $(docker images -q) - name: Show stats after core tests if: always() From 3fe25134a80fd7ef35727a8a26ecb098949eae63 Mon Sep 17 00:00:00 2001 From: badrogger Date: Fri, 5 Jul 2024 12:47:16 +0000 Subject: [PATCH 07/13] Fix run_firewall_test script --- scripts/run_firewall_test.sh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/scripts/run_firewall_test.sh b/scripts/run_firewall_test.sh index 6858b80e..c3960525 100755 --- a/scripts/run_firewall_test.sh +++ b/scripts/run_firewall_test.sh @@ -2,16 +2,12 @@ set -ea export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -source $DIR/helper.sh -source $DIR/../helper-scripts/helper.sh - -export_test_env docker rm -f test-firewall || true docker build -t admin:base . docker build -f tests.Dockerfile -t test-firewall . -docker run -v "$DIR/tests/skale-data/node_data":"/skale_node_data" \ - -v "$DIR/tests/skale-data":"/skale_vol" \ +docker run -v "$DIR/../tests/skale-data/node_data":"/skale_node_data" \ + -v "$DIR/../tests/skale-data":"/skale_vol" \ -e SGX_SERVER_URL="https://127.0.0.1:1026" \ -e ENDPOINT="http://127.0.0.1:8545" \ -e DB_USER="test" \ @@ -21,5 +17,3 @@ docker run -v "$DIR/tests/skale-data/node_data":"/skale_node_data" \ -e SKALE_DIR_HOST=/skale_dir_host \ --cap-add=NET_ADMIN --cap-add=NET_RAW \ --name test-firewall test-firewall pytest --cov core.schains.firewall tests/firewall/ $@ - -tests_cleanup From 82c09887d82a2d357ad3ca234a8a04eb3a0280f3 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 10 Jul 2024 14:25:35 +0000 Subject: [PATCH 08/13] Check last successful DKG in sync config monitor --- core/schains/checks.py | 151 ++++++++----------- core/schains/monitor/config_monitor.py | 10 +- core/schains/monitor/main.py | 4 + tests/schains/monitor/config_monitor_test.py | 85 ++++++++++- 4 files changed, 155 insertions(+), 95 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 73f12313..8cde844f 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -32,11 +32,11 @@ get_base_port_from_config, get_node_ips_from_config, get_own_ip_from_config, - get_local_schain_http_endpoint_from_config + get_local_schain_http_endpoint_from_config, ) from core.schains.config.main import ( get_skaled_config_rotations_ids, - get_upstream_config_rotation_ids + get_upstream_config_rotation_ids, ) from core.schains.dkg.utils import get_secret_key_share_filepath from core.schains.firewall.types import IRuleController @@ -45,14 +45,14 @@ from core.schains.rpc import ( check_endpoint_alive, check_endpoint_blocks, - get_endpoint_alive_check_timeout + get_endpoint_alive_check_timeout, ) from core.schains.external_config import ExternalConfig, ExternalState from core.schains.runner import ( get_container_name, get_ima_container_time_frame, get_image_name, - is_new_image_pulled + is_new_image_pulled, ) from core.schains.skaled_exit_codes import SkaledExitCodes from core.schains.volume import is_volume_exists @@ -79,7 +79,7 @@ 'rpc', 'blocks', 'process', - 'ima_container' + 'ima_container', ] TG_ALLOWED_CHECKS = [ @@ -90,7 +90,7 @@ 'rpc', 'blocks', 'process', - 'ima_container' + 'ima_container', ] @@ -111,11 +111,13 @@ class IChecks(ABC): def get_name(self) -> str: pass - def get_all(self, - log: bool = True, - save: bool = False, - expose: bool = False, - needed: Optional[List[str]] = None) -> Dict: + def get_all( + self, + log: bool = True, + save: bool = False, + expose: bool = False, + needed: Optional[List[str]] = None, + ) -> Dict: if needed: names = needed else: @@ -140,25 +142,27 @@ def is_healthy(self) -> bool: @classmethod def get_check_names(cls): - return list(filter( - lambda c: not c.startswith('_') and isinstance( - getattr(cls, c), property), - dir(cls) - )) + return list( + filter( + lambda c: not c.startswith('_') and isinstance(getattr(cls, c), property), dir(cls) + ) + ) class ConfigChecks(IChecks): - def __init__(self, - schain_name: str, - node_id: int, - schain_record: SChainRecord, - rotation_id: int, - stream_version: str, - current_nodes: list[ExtendedManagerNodeInfo], - estate: ExternalState, - sync_node: bool = False, - econfig: Optional[ExternalConfig] = None - ) -> None: + def __init__( + self, + schain_name: str, + node_id: int, + schain_record: SChainRecord, + rotation_id: int, + stream_version: str, + current_nodes: list[ExtendedManagerNodeInfo], + estate: ExternalState, + last_dkg_successful: bool, + sync_node: bool = False, + econfig: Optional[ExternalConfig] = None, + ) -> None: self.name = schain_name self.node_id = node_id self.schain_record = schain_record @@ -166,11 +170,10 @@ def __init__(self, self.stream_version = stream_version self.current_nodes = current_nodes self.estate = estate + self._last_dkg_successful = last_dkg_successful self.sync_node = sync_node self.econfig = econfig or ExternalConfig(schain_name) - self.cfm: ConfigFileManager = ConfigFileManager( - schain_name=schain_name - ) + self.cfm: ConfigFileManager = ConfigFileManager(schain_name=schain_name) self.statsd_client = get_statsd_client() def get_name(self) -> str: @@ -182,13 +185,14 @@ def config_dir(self) -> CheckRes: dir_path = self.cfm.dirname return CheckRes(os.path.isdir(dir_path)) + def last_dkg_successful(self) -> CheckRes: + """Checks that last dkg was successfuly completed""" + return CheckRes(self._last_dkg_successful) + @property def dkg(self) -> CheckRes: """Checks that DKG procedure is completed""" - secret_key_share_filepath = get_secret_key_share_filepath( - self.name, - self.rotation_id - ) + secret_key_share_filepath = get_secret_key_share_filepath(self.name, self.rotation_id) return CheckRes(os.path.isfile(secret_key_share_filepath)) @property @@ -227,17 +231,14 @@ def upstream_config(self) -> CheckRes: exists, node_ips_updated, stream_updated, - triggered + triggered, ) return CheckRes(exists and node_ips_updated and stream_updated and not triggered) @property def external_state(self) -> CheckRes: actual_state = self.econfig.get() - logger.debug( - 'Checking external config. Current %s. Saved %s', - self.estate, actual_state - ) + logger.debug('Checking external config. Current %s. Saved %s', self.estate, actual_state) return CheckRes(self.econfig.synced(self.estate)) @@ -250,7 +251,7 @@ def __init__( *, econfig: Optional[ExternalConfig] = None, dutils: Optional[DockerUtils] = None, - sync_node: bool = False + sync_node: bool = False, ): self.name = schain_name self.schain_record = schain_record @@ -259,9 +260,7 @@ def __init__( self.econfig = econfig or ExternalConfig(name=schain_name) self.sync_node = sync_node self.rc = rule_controller - self.cfm: ConfigFileManager = ConfigFileManager( - schain_name=schain_name - ) + self.cfm: ConfigFileManager = ConfigFileManager(schain_name=schain_name) self.statsd_client = get_statsd_client() def get_name(self) -> str: @@ -278,9 +277,7 @@ def rotation_id_updated(self) -> CheckRes: upstream_rotations = get_upstream_config_rotation_ids(self.cfm) config_rotations = get_skaled_config_rotations_ids(self.cfm) logger.debug( - 'Comparing rotation_ids. Upstream: %s. Config: %s', - upstream_rotations, - config_rotations + 'Comparing rotation_ids. Upstream: %s. Config: %s', upstream_rotations, config_rotations ) return CheckRes(upstream_rotations == config_rotations) @@ -292,19 +289,14 @@ def config_updated(self) -> CheckRes: @property def config(self) -> CheckRes: - """ Checks that sChain config file exists """ + """Checks that sChain config file exists""" return CheckRes(self.cfm.skaled_config_exists()) @property def volume(self) -> CheckRes: """Checks that sChain volume exists""" - return CheckRes( - is_volume_exists( - self.name, - sync_node=self.sync_node, - dutils=self.dutils) - ) + return CheckRes(is_volume_exists(self.name, sync_node=self.sync_node, dutils=self.dutils)) @property def firewall_rules(self) -> CheckRes: @@ -316,10 +308,7 @@ def firewall_rules(self) -> CheckRes: own_ip = get_own_ip_from_config(conf) ranges = self.econfig.ranges self.rc.configure( - base_port=base_port, - own_ip=own_ip, - node_ips=node_ips, - sync_ip_ranges=ranges + base_port=base_port, own_ip=own_ip, node_ips=node_ips, sync_ip_ranges=ranges ) logger.debug(f'Rule controller {self.rc.expected_rules()}') return CheckRes(self.rc.is_rules_synced()) @@ -364,19 +353,19 @@ def ima_container(self) -> CheckRes: updated_time_frame = time_frame == container_time_frame logger.debug( 'IMA image %s, container image %s, time frame %d, container_time_frame %d', - expected_image, image, time_frame, container_time_frame + expected_image, + image, + time_frame, + container_time_frame, ) data = { 'container_running': container_running, 'updated_image': updated_image, 'new_image_pulled': new_image_pulled, - 'updated_time_frame': updated_time_frame + 'updated_time_frame': updated_time_frame, } - logger.debug( - '%s, IMA check - %s', - self.name, data - ) + logger.debug('%s, IMA check - %s', self.name, data) result: bool = all(data.values()) return CheckRes(result, data=data) @@ -387,9 +376,7 @@ def rpc(self) -> CheckRes: if self.config: config = self.cfm.skaled_config http_endpoint = get_local_schain_http_endpoint_from_config(config) - timeout = get_endpoint_alive_check_timeout( - self.schain_record.failed_rpc_count - ) + timeout = get_endpoint_alive_check_timeout(self.schain_record.failed_rpc_count) res = check_endpoint_alive(http_endpoint, timeout=timeout) return CheckRes(res) @@ -430,7 +417,7 @@ def __init__( *, econfig: Optional[ExternalConfig] = None, dutils: DockerUtils = None, - sync_node: bool = False + sync_node: bool = False, ): self._subjects = [ ConfigChecks( @@ -442,7 +429,7 @@ def __init__( current_nodes=current_nodes, estate=estate, econfig=econfig, - sync_node=sync_node + sync_node=sync_node, ), SkaledChecks( schain_name=schain_name, @@ -450,8 +437,8 @@ def __init__( rule_controller=rule_controller, econfig=econfig, dutils=dutils, - sync_node=sync_node - ) + sync_node=sync_node, + ), ] def __getattr__(self, attr: str) -> Any: @@ -469,11 +456,7 @@ def get_all(self, log: bool = True, save: bool = False, needed: Optional[List[st plain_checks = {} for subj in self._subjects: logger.debug('Running checks for %s', subj) - subj_checks = subj.get_all( - log=False, - save=False, - needed=needed - ) + subj_checks = subj.get_all(log=False, save=False, needed=needed) plain_checks.update(subj_checks) if not self.estate or not self.estate.ima_linked: if 'ima_container' in plain_checks: @@ -492,13 +475,9 @@ def get_api_checks_status(status: Dict, allowed: List = API_ALLOWED_CHECKS) -> D def save_checks_dict(schain_name, checks_dict): schain_check_path = get_schain_check_filepath(schain_name) - logger.info( - f'Saving checks for the chain {schain_name}: {schain_check_path}') + logger.info(f'Saving checks for the chain {schain_name}: {schain_check_path}') try: - write_json(schain_check_path, { - 'time': time.time(), - 'checks': checks_dict - }) + write_json(schain_check_path, {'time': time.time(), 'checks': checks_dict}) except Exception: logger.exception(f'Failed to save checks: {schain_check_path}') @@ -510,14 +489,12 @@ def log_checks_dict(schain_name, checks_dict): if not checks_dict[check]: failed_checks.append(check) if len(failed_checks) != 0: - failed_checks_str = ", ".join(failed_checks) + failed_checks_str = ', '.join(failed_checks) logger.info( arguments_list_string( - { - 'sChain name': schain_name, - 'Failed checks': failed_checks_str - }, - 'Failed sChain checks', 'error' + {'sChain name': schain_name, 'Failed checks': failed_checks_str}, + 'Failed sChain checks', + 'error', ) ) diff --git a/core/schains/monitor/config_monitor.py b/core/schains/monitor/config_monitor.py index 1bd54841..47587a1b 100644 --- a/core/schains/monitor/config_monitor.py +++ b/core/schains/monitor/config_monitor.py @@ -29,11 +29,7 @@ class BaseConfigMonitor(IMonitor): - def __init__( - self, - action_manager: ConfigActionManager, - checks: ConfigChecks - ) -> None: + def __init__(self, action_manager: ConfigActionManager, checks: ConfigChecks) -> None: self.am = action_manager self.checks = checks @@ -73,7 +69,7 @@ def execute(self) -> None: self.am.config_dir() if not self.checks.external_state: self.am.external_state() - if not self.checks.upstream_config: + if self.checks.last_dkg_successful and not self.checks.upstream_config: self.am.upstream_config() - self.am.update_reload_ts(self.checks.skaled_node_ips, sync_node=True) + self.am.update_reload_ts(self.checks.skaled_node_ips, sync_node=True) self.am.reset_config_record() diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index ba57ba2a..dcbbb5e2 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -81,6 +81,7 @@ def run_config_pipeline( rotation_data = skale.node_rotation.get_rotation(name) allowed_ranges = get_sync_agent_ranges(skale) ima_linked = not SYNC_NODE and skale_ima.linker.has_schain(name) + last_dkg_successful = skale.dkg.is_last_dkg_successfull(name) current_nodes = get_current_nodes(skale, name) estate = ExternalState( @@ -96,6 +97,7 @@ def run_config_pipeline( stream_version=stream_version, rotation_id=rotation_data['rotation_id'], current_nodes=current_nodes, + last_dkg_successful=last_dkg_successful, econfig=econfig, estate=estate ) @@ -117,6 +119,8 @@ def run_config_pipeline( if SYNC_NODE: logger.info('Sync node mode, running config monitor') + if not last_dkg_successful: + logger.info('Latest DKG has not been completed for rotation_id %s', rotation_data) mon = SyncConfigMonitor(config_am, config_checks) else: logger.info('Regular node mode, running config monitor') diff --git a/tests/schains/monitor/config_monitor_test.py b/tests/schains/monitor/config_monitor_test.py index d7c211f6..3dc13584 100644 --- a/tests/schains/monitor/config_monitor_test.py +++ b/tests/schains/monitor/config_monitor_test.py @@ -10,7 +10,7 @@ from core.schains.config.directory import schain_config_dir from core.schains.monitor.action import ConfigActionManager -from core.schains.monitor.config_monitor import RegularConfigMonitor +from core.schains.monitor.config_monitor import RegularConfigMonitor, SyncConfigMonitor from core.schains.external_config import ExternalConfig from web.models.schain import SChainRecord @@ -42,6 +42,7 @@ def config_checks( rotation_id=rotation_data['rotation_id'], stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate ) @@ -84,9 +85,18 @@ def regular_config_monitor(config_am, config_checks): ) +@pytest.fixture +def sync_config_monitor(config_am, config_checks): + return SyncConfigMonitor( + action_manager=config_am, + checks=config_checks + ) + + def test_regular_config_monitor(schain_db, regular_config_monitor, rotation_data): name = schain_db rotation_id = rotation_data['rotation_id'] + regular_config_monitor.run() config_dir = schain_config_dir(name) @@ -130,3 +140,76 @@ def test_regular_config_monitor_change_ip( regular_config_monitor.am.cfm.sync_skaled_config_with_upstream() regular_config_monitor.run() assert econfig.reload_ts is None + + +def test_sync_config_monitor( + skale, + schain_db, + config_am, + config_checks, + econfig, + estate, + rotation_data +): + name = schain_db + config_dir = schain_config_dir(name) + + rotation_id = rotation_data['rotation_id'] + config_pattern = os.path.join( + config_dir, + f'schain_{name}_{rotation_id}_*.json' + ) + assert len(glob.glob(config_pattern)) == 0 + + assert econfig.synced(estate) + + estate.chain_id = 1 + config_checks.estate = estate + config_am.estate = estate + assert not econfig.synced(estate) + + sync_config_monitor = SyncConfigMonitor( + action_manager=config_am, + checks=config_checks + ) + sync_config_monitor.run() + assert econfig.synced(estate) + config_filename = glob.glob(config_pattern) + assert os.path.isfile(config_filename[0]) + + +def test_sync_config_monitor_dkg_not_completed( + skale, + schain_db, + config_am, + config_checks, + econfig, + estate, + rotation_data +): + name = schain_db + config_dir = schain_config_dir(name) + + rotation_id = rotation_data['rotation_id'] + config_pattern = os.path.join( + config_dir, + f'schain_{name}_{rotation_id}_*.json' + ) + assert len(glob.glob(config_pattern)) == 0 + + assert econfig.synced(estate) + + estate.chain_id = 1 + config_checks.estate = estate + config_am.estate = estate + config_checks.last_dkg_successful = False + assert not econfig.synced(estate) + + sync_config_monitor = SyncConfigMonitor( + action_manager=config_am, + checks=config_checks + ) + sync_config_monitor.run() + assert econfig.synced(estate) + # config generation was not triggered because dkg has not been completed + assert len(glob.glob(config_pattern)) == 0 From a94d7ac4227c1ad2c932c11713992b23c34d31eb Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 10 Jul 2024 15:44:14 +0000 Subject: [PATCH 09/13] Improve logging --- core/schains/monitor/main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index dcbbb5e2..3d50e049 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -118,9 +118,7 @@ def run_config_pipeline( logger.info('Config checks: %s', status) if SYNC_NODE: - logger.info('Sync node mode, running config monitor') - if not last_dkg_successful: - logger.info('Latest DKG has not been completed for rotation_id %s', rotation_data) + logger.info('Sync node last_dkg_successful %s, rotation_data %s', last_dkg_successful, rotation_data) mon = SyncConfigMonitor(config_am, config_checks) else: logger.info('Regular node mode, running config monitor') From 352ec6d06d390f6caed6516681aa28e047dfe613 Mon Sep 17 00:00:00 2001 From: badrogger Date: Wed, 10 Jul 2024 18:16:06 +0000 Subject: [PATCH 10/13] Fix last_dkg_successful check --- core/schains/checks.py | 1 + core/schains/monitor/main.py | 90 +++++++++++++++--------------------- 2 files changed, 38 insertions(+), 53 deletions(-) diff --git a/core/schains/checks.py b/core/schains/checks.py index 8cde844f..71a1946e 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -185,6 +185,7 @@ def config_dir(self) -> CheckRes: dir_path = self.cfm.dirname return CheckRes(os.path.isdir(dir_path)) + @property def last_dkg_successful(self) -> CheckRes: """Checks that last dkg was successfuly completed""" return CheckRes(self._last_dkg_successful) diff --git a/core/schains/monitor/main.py b/core/schains/monitor/main.py index 3d50e049..5030b8c7 100644 --- a/core/schains/monitor/main.py +++ b/core/schains/monitor/main.py @@ -31,20 +31,11 @@ from core.node import get_skale_node_version from core.node_config import NodeConfig -from core.schains.checks import ( - ConfigChecks, - get_api_checks_status, - TG_ALLOWED_CHECKS, - SkaledChecks -) +from core.schains.checks import ConfigChecks, get_api_checks_status, TG_ALLOWED_CHECKS, SkaledChecks from core.schains.config.file_manager import ConfigFileManager from core.schains.firewall import get_default_rule_controller from core.schains.firewall.utils import get_sync_agent_ranges -from core.schains.monitor import ( - get_skaled_monitor, - RegularConfigMonitor, - SyncConfigMonitor -) +from core.schains.monitor import get_skaled_monitor, RegularConfigMonitor, SyncConfigMonitor from core.schains.monitor.action import ConfigActionManager, SkaledActionManager from core.schains.external_config import ExternalConfig, ExternalState from core.schains.task import keep_tasks_running, Task @@ -70,24 +61,19 @@ def run_config_pipeline( - skale: Skale, - skale_ima: SkaleIma, - schain: Dict, - node_config: NodeConfig, - stream_version: str + skale: Skale, skale_ima: SkaleIma, schain: Dict, node_config: NodeConfig, stream_version: str ) -> None: name = schain['name'] schain_record = SChainRecord.get_by_name(name) rotation_data = skale.node_rotation.get_rotation(name) allowed_ranges = get_sync_agent_ranges(skale) ima_linked = not SYNC_NODE and skale_ima.linker.has_schain(name) - last_dkg_successful = skale.dkg.is_last_dkg_successfull(name) + group_index = skale.schains.name_to_group_id(name) + last_dkg_successful = skale.dkg.is_last_dkg_successful(group_index) current_nodes = get_current_nodes(skale, name) estate = ExternalState( - ima_linked=ima_linked, - chain_id=skale_ima.web3.eth.chain_id, - ranges=allowed_ranges + ima_linked=ima_linked, chain_id=skale_ima.web3.eth.chain_id, ranges=allowed_ranges ) econfig = ExternalConfig(name) config_checks = ConfigChecks( @@ -99,7 +85,7 @@ def run_config_pipeline( current_nodes=current_nodes, last_dkg_successful=last_dkg_successful, econfig=econfig, - estate=estate + estate=estate, ) config_am = ConfigActionManager( @@ -111,14 +97,18 @@ def run_config_pipeline( checks=config_checks, current_nodes=current_nodes, estate=estate, - econfig=econfig + econfig=econfig, ) status = config_checks.get_all(log=False, expose=True) logger.info('Config checks: %s', status) if SYNC_NODE: - logger.info('Sync node last_dkg_successful %s, rotation_data %s', last_dkg_successful, rotation_data) + logger.info( + 'Sync node last_dkg_successful %s, rotation_data %s', + last_dkg_successful, + rotation_data + ) mon = SyncConfigMonitor(config_am, config_checks) else: logger.info('Regular node mode, running config monitor') @@ -132,10 +122,7 @@ def run_config_pipeline( def run_skaled_pipeline( - skale: Skale, - schain: Dict, - node_config: NodeConfig, - dutils: DockerUtils + skale: Skale, schain: Dict, node_config: NodeConfig, dutils: DockerUtils ) -> None: name = schain['name'] schain_record = SChainRecord.get_by_name(name) @@ -149,7 +136,7 @@ def run_skaled_pipeline( schain_record=schain_record, rule_controller=rc, dutils=dutils, - sync_node=SYNC_NODE + sync_node=SYNC_NODE, ) skaled_status = get_skaled_status(name) @@ -160,12 +147,11 @@ def run_skaled_pipeline( checks=skaled_checks, node_config=node_config, econfig=ExternalConfig(name), - dutils=dutils + dutils=dutils, ) status = skaled_checks.get_all(log=False, expose=True) automatic_repair = get_automatic_repair_option() - api_status = get_api_checks_status( - status=status, allowed=TG_ALLOWED_CHECKS) + api_status = get_api_checks_status(status=status, allowed=TG_ALLOWED_CHECKS) notify_checks(name, node_config.all(), api_status) logger.info('Skaled status: %s', status) @@ -177,7 +163,7 @@ def run_skaled_pipeline( status=status, schain_record=schain_record, skaled_status=skaled_status, - automatic_repair=automatic_repair + automatic_repair=automatic_repair, ) statsd_client = get_statsd_client() @@ -188,11 +174,9 @@ def run_skaled_pipeline( def post_monitor_sleep(): schain_monitor_sleep = random.randint( - MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, - MAX_SCHAIN_MONITOR_SLEEP_INTERVAL + MIN_SCHAIN_MONITOR_SLEEP_INTERVAL, MAX_SCHAIN_MONITOR_SLEEP_INTERVAL ) - logger.info('Monitor iteration completed, sleeping for %d', - schain_monitor_sleep) + logger.info('Monitor iteration completed, sleeping for %d', schain_monitor_sleep) time.sleep(schain_monitor_sleep) @@ -205,7 +189,7 @@ def create_and_execute_tasks( schain_record, executor, futures, - dutils + dutils, ): reload(web3_request) name = schain['name'] @@ -219,7 +203,9 @@ def create_and_execute_tasks( logger.info( 'sync_config_run %s, config_version %s, stream_version %s', - schain_record.sync_config_run, schain_record.config_version, stream_version + schain_record.sync_config_run, + schain_record.config_version, + stream_version, ) statsd_client = get_statsd_client() @@ -239,12 +225,14 @@ def create_and_execute_tasks( skale_ima=skale_ima, schain=schain, node_config=node_config, - stream_version=stream_version + stream_version=stream_version, ), - sleep=CONFIG_PIPELINE_SLEEP - )) - if schain_record.config_version != stream_version or \ - (schain_record.sync_config_run and schain_record.first_run): + sleep=CONFIG_PIPELINE_SLEEP, + ) + ) + if schain_record.config_version != stream_version or ( + schain_record.sync_config_run and schain_record.first_run + ): ConfigFileManager(name).remove_skaled_config() else: logger.info('Adding skaled task to the pool') @@ -256,10 +244,11 @@ def create_and_execute_tasks( skale=skale, schain=schain, node_config=node_config, - dutils=dutils + dutils=dutils, ), - sleep=SKALED_PIPELINE_SLEEP - )) + sleep=SKALED_PIPELINE_SLEEP, + ) + ) if len(tasks) == 0: logger.warning('No tasks to run') @@ -267,12 +256,7 @@ def create_and_execute_tasks( def run_monitor_for_schain( - skale, - skale_ima, - node_config: NodeConfig, - schain, - dutils=None, - once=False + skale, skale_ima, node_config: NodeConfig, schain, dutils=None, once=False ): stream_version = get_skale_node_version() tasks_number = 2 @@ -290,7 +274,7 @@ def run_monitor_for_schain( schain_record, executor, futures, - dutils + dutils, ) if once: return True From 05872f6955766d8e8539610520ec930194f532d1 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 11 Jul 2024 11:14:48 +0000 Subject: [PATCH 11/13] Fix config monitor tests --- tests/schains/monitor/config_monitor_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/schains/monitor/config_monitor_test.py b/tests/schains/monitor/config_monitor_test.py index 3dc13584..71fbc228 100644 --- a/tests/schains/monitor/config_monitor_test.py +++ b/tests/schains/monitor/config_monitor_test.py @@ -202,7 +202,7 @@ def test_sync_config_monitor_dkg_not_completed( estate.chain_id = 1 config_checks.estate = estate config_am.estate = estate - config_checks.last_dkg_successful = False + config_checks._last_dkg_successful = False assert not econfig.synced(estate) sync_config_monitor = SyncConfigMonitor( From 4f8e1979bdfdd5225f6e9c29a5b6fe436e4c45d6 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 11 Jul 2024 16:31:27 +0000 Subject: [PATCH 12/13] Fix tests --- core/schains/checks.py | 2 ++ core/schains/cleaner.py | 2 ++ tests/conftest.py | 1 + tests/schains/checks_test.py | 13 +++++++++++++ tests/schains/cleaner_test.py | 1 + tests/schains/monitor/action/config_action_test.py | 1 + web/routes/health.py | 1 + 7 files changed, 21 insertions(+) diff --git a/core/schains/checks.py b/core/schains/checks.py index 71a1946e..d3a171c0 100644 --- a/core/schains/checks.py +++ b/core/schains/checks.py @@ -414,6 +414,7 @@ def __init__( stream_version: str, estate: ExternalState, current_nodes: list[ExtendedManagerNodeInfo], + last_dkg_successful: bool, rotation_id: int = 0, *, econfig: Optional[ExternalConfig] = None, @@ -428,6 +429,7 @@ def __init__( rotation_id=rotation_id, stream_version=stream_version, current_nodes=current_nodes, + last_dkg_successful=last_dkg_successful, estate=estate, econfig=econfig, sync_node=sync_node, diff --git a/core/schains/cleaner.py b/core/schains/cleaner.py index 387f1861..985089db 100644 --- a/core/schains/cleaner.py +++ b/core/schains/cleaner.py @@ -224,6 +224,7 @@ def cleanup_schain( schain_name, sync_agent_ranges, rotation_id, + last_dkg_successful, current_nodes, estate, dutils=None @@ -245,6 +246,7 @@ def cleanup_schain( current_nodes=current_nodes, rotation_id=rotation_id, estate=estate, + last_dkg_successful=last_dkg_successful, dutils=dutils, sync_node=SYNC_NODE ) diff --git a/tests/conftest.py b/tests/conftest.py index 807884c4..8b34c172 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -513,6 +513,7 @@ def schain_checks(schain_config, schain_db, current_nodes, rule_controller, esta rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index 95bb8595..b2cf591e 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -96,6 +96,7 @@ def sample_false_checks(schain_config, schain_db, rule_controller, current_nodes schain_record=schain_record, rule_controller=rule_controller, stream_version=CONFIG_STREAM, + last_dkg_successful=True, current_nodes=current_nodes, estate=estate, dutils=dutils @@ -120,6 +121,7 @@ def rules_unsynced_checks( rule_controller=uninited_rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -288,6 +290,7 @@ def test_init_checks(skale, schain_db, current_nodes, uninited_rule_controller, rule_controller=uninited_rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -316,6 +319,7 @@ def test_exit_code(skale, rule_controller, schain_db, current_nodes, estate, dut rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -335,6 +339,7 @@ def test_process(skale, rule_controller, schain_db, current_nodes, estate, dutil rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -359,6 +364,7 @@ def test_get_all(schain_config, rule_controller, dutils, current_nodes, schain_d rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -381,6 +387,7 @@ def test_get_all(schain_config, rule_controller, dutils, current_nodes, schain_d rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -408,6 +415,7 @@ def test_get_all_with_save(node_config, rule_controller, current_nodes, dutils, rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -432,9 +440,11 @@ def test_config_updated(skale, rule_controller, schain_db, current_nodes, estate rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) + assert checks.last_dkg_successful is False assert checks.config_updated upstream_path = UpstreamConfigFilename( @@ -453,6 +463,7 @@ def test_config_updated(skale, rule_controller, schain_db, current_nodes, estate rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) @@ -466,7 +477,9 @@ def test_config_updated(skale, rule_controller, schain_db, current_nodes, estate rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, dutils=dutils ) + assert checks.last_dkg_successful is True assert not checks.config_updated diff --git a/tests/schains/cleaner_test.py b/tests/schains/cleaner_test.py index e8319474..4db2e9f4 100644 --- a/tests/schains/cleaner_test.py +++ b/tests/schains/cleaner_test.py @@ -255,6 +255,7 @@ def test_cleanup_schain( schain_name, current_nodes=current_nodes, sync_agent_ranges=[], + last_dkg_successful=True, rotation_id=0, estate=estate, dutils=dutils diff --git a/tests/schains/monitor/action/config_action_test.py b/tests/schains/monitor/action/config_action_test.py index 77176972..57c904ad 100644 --- a/tests/schains/monitor/action/config_action_test.py +++ b/tests/schains/monitor/action/config_action_test.py @@ -37,6 +37,7 @@ def config_checks( schain_record=schain_record, rotation_id=rotation_data['rotation_id'], stream_version=CONFIG_STREAM, + last_dkg_successful=True, current_nodes=current_nodes, estate=estate ) diff --git a/web/routes/health.py b/web/routes/health.py index 56c5258e..2503d674 100644 --- a/web/routes/health.py +++ b/web/routes/health.py @@ -112,6 +112,7 @@ def schains_checks(): rotation_id=rotation_id, stream_version=stream_version, current_nodes=current_nodes, + last_dkg_successful=True, estate=estate, sync_node=False ).get_all(needed=checks_filter) From 6310af162ab56819f9f6acb1450ea226280aa878 Mon Sep 17 00:00:00 2001 From: badrogger Date: Thu, 11 Jul 2024 18:08:11 +0000 Subject: [PATCH 13/13] Fix config updated check --- tests/schains/checks_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/schains/checks_test.py b/tests/schains/checks_test.py index b2cf591e..2e86f4dc 100644 --- a/tests/schains/checks_test.py +++ b/tests/schains/checks_test.py @@ -440,11 +440,11 @@ def test_config_updated(skale, rule_controller, schain_db, current_nodes, estate rule_controller=rule_controller, stream_version=CONFIG_STREAM, current_nodes=current_nodes, - last_dkg_successful=True, + last_dkg_successful=False, estate=estate, dutils=dutils ) - assert checks.last_dkg_successful is False + assert checks.last_dkg_successful.status is False assert checks.config_updated upstream_path = UpstreamConfigFilename( @@ -481,5 +481,5 @@ def test_config_updated(skale, rule_controller, schain_db, current_nodes, estate estate=estate, dutils=dutils ) - assert checks.last_dkg_successful is True + assert checks.last_dkg_successful.status is True assert not checks.config_updated