Skip to content

Commit

Permalink
Merge pull request #1099 from skalenetwork/fix-schain-removal
Browse files Browse the repository at this point in the history
Fix cleaner
  • Loading branch information
badrogger authored Aug 19, 2024
2 parents 6c1da5a + 780c959 commit 0ab2ced
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 83 deletions.
20 changes: 13 additions & 7 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ env:

jobs:
test_core:
runs-on: ubuntu-latest
runs-on: self-hosted
env:
ETH_PRIVATE_KEY: ${{ secrets.ETH_PRIVATE_KEY }}
ENDPOINT: http://127.0.0.1:8545
Expand All @@ -22,13 +22,11 @@ jobs:
with:
submodules: true

- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v1
with:
python-version: ${{ env.PYTHON_VERSION }}

- name: Install python dependencies
run: bash ./scripts/install_python_dependencies.sh
run: |
source ~/ga/bin/activate
python --version
bash ./scripts/install_python_dependencies.sh
- name: Lint with flake8
run: flake8 .
Expand All @@ -53,6 +51,8 @@ jobs:
- name: Run core tests
run: |
source ~/ga/bin/activate
python --version
bash ./scripts/run_core_tests.sh
- name: Cleanup docker artifacts
Expand All @@ -76,6 +76,12 @@ jobs:
sudo lsblk -f
sudo free -h
- name: Cleanup docker artifacts
if: always()
run: |
docker rm -f $(docker ps -aq)
docker rmi -f $(docker images -q)
- name: Run codecov
run: |
codecov -t $CODECOV_TOKEN
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.7.0
2.7.1
110 changes: 43 additions & 67 deletions core/schains/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
import os
import shutil
from multiprocessing import Process
from typing import Optional

from sgx import SgxClient
from skale import Skale

from core.node import get_current_nodes, get_skale_node_version
from core.schains.checks import SChainChecks
Expand All @@ -33,7 +35,7 @@
from core.schains.config.helper import (
get_base_port_from_config,
get_node_ips_from_config,
get_own_ip_from_config
get_own_ip_from_config,
)
from core.schains.process_manager_helper import terminate_schain_process
from core.schains.runner import get_container_name, is_exited
Expand All @@ -43,9 +45,7 @@

from tools.configs import SGX_CERTIFICATES_FOLDER, SYNC_NODE
from tools.configs.schains import SCHAINS_DIR_PATH
from tools.configs.containers import (
SCHAIN_CONTAINER, IMA_CONTAINER, SCHAIN_STOP_TIMEOUT
)
from tools.configs.containers import SCHAIN_CONTAINER, IMA_CONTAINER, SCHAIN_STOP_TIMEOUT
from tools.docker_utils import DockerUtils
from tools.helper import merged_unique, read_json, is_node_part_of_chain
from tools.sgx_utils import SGX_SERVER_URL
Expand All @@ -59,8 +59,7 @@


def run_cleaner(skale, node_config):
process = Process(name='cleaner', target=monitor,
args=(skale, node_config))
process = Process(name='cleaner', target=monitor, args=(skale, node_config))
process.start()
logger.info('Cleaner process started')
process.join(JOIN_TIMEOUT)
Expand All @@ -84,12 +83,7 @@ def remove_schain_container(schain_name: str, dutils: DockerUtils = None):
dutils = dutils or DockerUtils()
log_remove('container', schain_name)
schain_container_name = get_container_name(SCHAIN_CONTAINER, schain_name)
return dutils.safe_rm(
schain_container_name,
v=True,
force=True,
timeout=SCHAIN_STOP_TIMEOUT
)
return dutils.safe_rm(schain_container_name, v=True, force=True, timeout=SCHAIN_STOP_TIMEOUT)


def remove_ima_container(schain_name: str, dutils: DockerUtils = None):
Expand All @@ -109,10 +103,7 @@ def monitor(skale, node_config, dutils=None):
dutils = dutils or DockerUtils()
logger.info('Cleaner procedure started.')
schains_on_node = get_schains_on_node(dutils=dutils)
schain_names_on_contracts = get_schain_names_from_contract(
skale,
node_config.id
)
schain_names_on_contracts = get_schain_names_from_contract(skale, node_config.id)
logger.info(f'\nsChains on contracts: {schain_names_on_contracts}\n\
sChains on node: {schains_on_node}')

Expand All @@ -121,12 +112,7 @@ def monitor(skale, node_config, dutils=None):
logger.warning(f'sChain {schain_name} was found on node, but not on contracts: \
{schain_names_on_contracts}, going to remove it!')
try:
ensure_schain_removed(
skale,
schain_name,
node_config.id,
dutils=dutils
)
ensure_schain_removed(skale, schain_name, node_config.id, dutils=dutils)
except Exception:
logger.exception(f'sChain removal {schain_name} failed')
logger.info('Cleanup procedure finished')
Expand All @@ -140,8 +126,7 @@ def get_schain_names_from_contract(skale, node_id):
def get_schains_with_containers(dutils=None):
dutils = dutils or DockerUtils()
return [
c.name.replace('skale_schain_', '', 1)
for c in dutils.get_all_schain_containers(all=True)
c.name.replace('skale_schain_', '', 1) for c in dutils.get_all_schain_containers(all=True)
]


Expand All @@ -150,11 +135,7 @@ def get_schains_on_node(dutils=None):
schains_with_dirs = os.listdir(SCHAINS_DIR_PATH)
schains_with_container = get_schains_with_containers(dutils)
schains_active_records = get_schains_names()
return sorted(merged_unique(
schains_with_dirs,
schains_with_container,
schains_active_records
))
return sorted(merged_unique(schains_with_dirs, schains_with_container, schains_active_records))


def schain_names_to_ids(skale, schain_names):
Expand All @@ -172,70 +153,77 @@ def ensure_schain_removed(skale, schain_name, node_id, dutils=None):
if not is_schain_exist:
msg = arguments_list_string(
{'sChain name': schain_name},
'Going to remove this sChain because it was removed from contracts'
'Going to remove this sChain because it was removed from contracts',
)
return remove_schain(skale, node_id, schain_name, msg, dutils=dutils)

if skale.node_rotation.is_rotation_active(schain_name):
msg = arguments_list_string(
{'sChain name': schain_name},
'Rotation is in progress (new group created), skipping cleaner'
'Rotation is in progress (new group created), skipping cleaner',
)
logger.info(msg)
return

if not is_node_part_of_chain(skale, schain_name, node_id):
msg = arguments_list_string(
{'sChain name': schain_name},
'Going to remove this sChain because this node is not in the group'
'Going to remove this sChain because this node is not in the group',
)
return remove_schain(skale, node_id, schain_name, msg, dutils=dutils)

msg = arguments_list_string(
{'sChain name': schain_name},
'sChain do not satisfy removal condidions'
{'sChain name': schain_name}, 'sChain do not satisfy removal condidions'
)
logger.warning(msg)


def remove_schain(skale, node_id, schain_name, msg, dutils=None) -> None:
def remove_schain(
skale: Skale,
node_id: int,
schain_name: str,
msg: str,
dutils: Optional[DockerUtils] = None,
) -> None:
schain_record = upsert_schain_record(schain_name)
logger.warning(msg)
terminate_schain_process(schain_record)

delete_bls_keys(skale, schain_name)
sync_agent_ranges = get_sync_agent_ranges(skale)
rotation_data = skale.node_rotation.get_rotation(schain_name)
rotation_id = rotation_data['rotation_id']
estate = ExternalConfig(name=schain_name).get()
current_nodes = get_current_nodes(skale, schain_name)
group_index = skale.schains.name_to_group_id(schain_name)
last_dkg_successful = skale.dkg.is_last_dkg_successful(group_index)

cleanup_schain(
node_id,
schain_name,
sync_agent_ranges,
rotation_id=rotation_id,
last_dkg_successful=last_dkg_successful,
current_nodes=current_nodes,
estate=estate,
dutils=dutils
dutils=dutils,
)


def cleanup_schain(
node_id,
schain_name,
sync_agent_ranges,
rotation_id,
last_dkg_successful,
current_nodes,
estate,
dutils=None
node_id: int,
schain_name: str,
sync_agent_ranges: list,
rotation_id: int,
last_dkg_successful: bool,
current_nodes: list,
estate: ExternalConfig,
dutils=None,
) -> None:
dutils = dutils or DockerUtils()
schain_record = upsert_schain_record(schain_name)

rc = get_default_rule_controller(
name=schain_name,
sync_agent_ranges=sync_agent_ranges
)
rc = get_default_rule_controller(name=schain_name, sync_agent_ranges=sync_agent_ranges)
stream_version = get_skale_node_version()
checks = SChainChecks(
schain_name,
Expand All @@ -248,13 +236,11 @@ def cleanup_schain(
estate=estate,
last_dkg_successful=last_dkg_successful,
dutils=dutils,
sync_node=SYNC_NODE
sync_node=SYNC_NODE,
)
status = checks.get_all()
if status['skaled_container'] or is_exited(
schain_name,
container_type=ContainerType.schain,
dutils=dutils
schain_name, container_type=ContainerType.schain, dutils=dutils
):
remove_schain_container(schain_name, dutils=dutils)
if status['volume']:
Expand All @@ -267,18 +253,11 @@ def cleanup_schain(
ranges = []
if estate is not None:
ranges = estate.ranges
rc.configure(
base_port=base_port,
own_ip=own_ip,
node_ips=node_ips,
sync_ip_ranges=ranges
)
rc.configure(base_port=base_port, own_ip=own_ip, node_ips=node_ips, sync_ip_ranges=ranges)
rc.cleanup()
if estate is not None and estate.ima_linked:
if status.get('ima_container', False) or is_exited(
schain_name,
container_type=ContainerType.ima,
dutils=dutils
schain_name, container_type=ContainerType.ima, dutils=dutils
):
remove_ima_container(schain_name, dutils=dutils)
if status['config_dir']:
Expand All @@ -290,15 +269,12 @@ def delete_bls_keys(skale, schain_name):
last_rotation_id = skale.schains.get_last_rotation_id(schain_name)
for i in range(last_rotation_id + 1):
try:
secret_key_share_filepath = get_secret_key_share_filepath(
schain_name, i)
secret_key_share_filepath = get_secret_key_share_filepath(schain_name, i)
if os.path.isfile(secret_key_share_filepath):
secret_key_share_config = read_json(
secret_key_share_filepath) or {}
secret_key_share_config = read_json(secret_key_share_filepath) or {}
bls_key_name = secret_key_share_config.get('key_share_name')
if bls_key_name:
sgx = SgxClient(SGX_SERVER_URL,
path_to_cert=SGX_CERTIFICATES_FOLDER)
sgx = SgxClient(SGX_SERVER_URL, path_to_cert=SGX_CERTIFICATES_FOLDER)
sgx.delete_bls_key(bls_key_name)
except Exception:
logger.exception(f'Removing secret_key for rotation {i} failed')
1 change: 1 addition & 0 deletions scripts/helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export_test_env () {
export FLASK_APP_HOST=0.0.0.0
export FLASK_APP_PORT=3008
export FLASK_DEBUG_MODE=True
export REDIS_URI="redis://@127.0.0.1:6381"
export TG_CHAT_ID=-1231232
export TG_API_KEY=123
export ENV_TYPE=devnet
Expand Down
4 changes: 0 additions & 4 deletions scripts/install_python_dependencies.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#!/usr/bin/env bash
set -ea

python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -r requirements-dev.txt
pip uninstall pycrypto -y
pip uninstall pycryptodome -y
pip install pycryptodome
find . -name "*.pyc" -exec rm -f {} \;
2 changes: 1 addition & 1 deletion scripts/run_redis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ set -e

docker rm -f redis || true
export DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
docker run -v $DIR/../tests/redis-conf:/config:Z -p 6379:6379 --name=redis -d redis:6.0-alpine
docker run -v $DIR/../tests/redis-conf:/config -p 6381:6381 --name=redis -d redis:6.0-alpine
4 changes: 2 additions & 2 deletions tests/redis-conf/redis.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
bind 127.0.0.1
protected-mode yes
port 6379
port 6381

tcp-backlog 511
timeout 0
Expand All @@ -10,7 +10,7 @@ daemonize no

supervised no

pidfile /var/run/redis_6379.pid
pidfile /var/run/redis_6381.pid

loglevel notice
logfile ""
Expand Down
2 changes: 1 addition & 1 deletion tests/routes/node_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_btrfs_info(skale_bp, skale):
data = get_bp_data(skale_bp, get_api_url(BLUEPRINT_NAME, 'btrfs-info'))
assert data['status'] == 'ok'
payload = data['payload']
assert payload['kernel_module'] is False
assert payload['kernel_module'] is True


@pytest.fixture
Expand Down
12 changes: 12 additions & 0 deletions tests/schains/cleaner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from core.schains.cleaner import (
cleanup_schain,
delete_bls_keys,
remove_schain,
monitor,
get_schains_on_node,
remove_config_dir,
Expand Down Expand Up @@ -238,6 +239,17 @@ def test_get_schains_on_node(schain_dirs_for_monitor,
]).issubset(set(result))


def test_remove_schain(skale, schain_db, node_config, dutils):
schain_name = schain_db
remove_schain(skale, node_config.id, schain_name, msg='Test remove_schain', dutils=dutils)
container_name = SCHAIN_CONTAINER_NAME_TEMPLATE.format(schain_name)
assert not is_container_running(dutils, container_name)
schain_dir_path = os.path.join(SCHAINS_DIR_PATH, schain_name)
assert not os.path.isdir(schain_dir_path)
record = SChainRecord.get_by_name(schain_name)
assert record.is_deleted is True


def test_cleanup_schain(
schain_db,
node_config,
Expand Down

0 comments on commit 0ab2ced

Please sign in to comment.