Skip to content

Commit

Permalink
refactor(get_peers_info): switch to use python-driver
Browse files Browse the repository at this point in the history
since when using cqlsh with cloud bundle, we don't have
a guerrente anymore that we'll uses only a specific node,
since we are connection to all of them.

thing like reading `system.peers` should be done now
with the python-driver `cql_connection_patient_exclusive`
helper function.
  • Loading branch information
fruch committed Dec 18, 2022
1 parent a269fde commit 1e2dd0f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 38 deletions.
38 changes: 9 additions & 29 deletions sdcm/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2671,52 +2671,32 @@ def get_peers_info(self):
'peer', 'data_center', 'host_id', 'rack', 'release_version',
'rpc_address', 'schema_version', 'supported_features',
)
columns_len = len(columns)
cql_result = self.run_cqlsh(
f"select {', '.join(columns)} from system.peers", split=True, verbose=False)
# peer | data_center | host_id | rack | release_version | rpc_address | schema_version | supported_features
# ------+-------------+---------+------+-----------------+-------------+----------------+--------------------

peers_details = {}
with self.parent_cluster.cql_connection_patient_exclusive(self) as session:
result = session.execute(f"select {', '.join(columns)} from system.peers")
cql_results = result.all()
err = ''
for line in cql_result:
if '|' not in line or all(column in line for column in columns):
# NOTE: skip non-rows and header lines
continue
line_splitted = line.split('|')
if len(line_splitted) != columns_len:
current_err = f"Failed to parse the cqlsh command output line: \n{line}\n"
LOGGER.warning(current_err)
err += current_err
continue
peer = line_splitted[0].strip()
for row in cql_results:
peer = row.peer
try:
ipaddress.ip_address(peer)
ipaddress.ip_address(row.peer)
except ValueError as exc:
current_err = f"Peer '{peer}' is not an IP address, err: {exc}\n"
LOGGER.warning(current_err)
err += current_err
continue

if node := self.parent_cluster.find_node_by_ip(peer):
peers_details[node] = {
'data_center': line_splitted[1].strip(),
'host_id': line_splitted[2].strip(),
'rack': line_splitted[3].strip(),
'release_version': line_splitted[4].strip(),
'rpc_address': line_splitted[5].strip(),
'schema_version': line_splitted[6].strip(),
'supported_features': line_splitted[7].strip(),
}
peers_details[node] = row._asdict()
else:
current_err = f"'get_peers_info' failed to find a node by IP: {peer}\n"
LOGGER.error(current_err)
err += current_err

if not (peers_details or err):
LOGGER.error(
"No data, no errors. Check the output from the cqlsh for the correctness:\n%s",
cql_result)
"No data, no errors. Check the output from the cql command for the correctness:\n%s",
cql_results)
return peers_details

@retrying(n=5, sleep_time=10, raise_on_exceeded=False)
Expand Down
11 changes: 6 additions & 5 deletions sdcm/utils/health_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ def check_nulls_in_peers(gossip_info, peers_details, current_node) -> HealthEven

# By Asias request: https://github.com/scylladb/scylla/issues/6397#issuecomment-666893877
LOGGER.debug("Print all columns from system.peers for peer %s", node)
current_node.run_cqlsh(f"select * from system.peers where peer = '{node.ip_address}'", split=True, verbose=True)

with current_node.parent_cluster.cql_connection_patient_exclusive(current_node) as session:
result = session.execute(f"select * from system.peers where peer = '{node.ip_address}'")
LOGGER.debug(result.one()._asdict())
if node in gossip_info and gossip_info[node]['status'] not in current_node.GOSSIP_STATUSES_FILTER_OUT:
yield ClusterHealthValidatorEvent.NodePeersNulls(
severity=Severity.ERROR,
Expand Down Expand Up @@ -167,7 +168,7 @@ def check_schema_version(gossip_info, peers_details, nodes_status, current_node)
)
continue

if node_info['schema'] != peers_details[node]['schema_version']:
if node_info['schema'] != str(peers_details[node]['schema_version']):
LOGGER.debug(debug_message)
yield ClusterHealthValidatorEvent.NodeSchemaVersion(
severity=Severity.ERROR,
Expand Down Expand Up @@ -205,7 +206,7 @@ def check_schema_version(gossip_info, peers_details, nodes_status, current_node)
)

# Validate that same schema on all nodes in the SYSTEM.PEERS
schema_version_on_all_nodes = [values['schema_version'] for node, values in peers_details.items()
schema_version_on_all_nodes = [str(values['schema_version']) for node, values in peers_details.items()
if node in gossip_info and gossip_info[node]['status'] not in
current_node.GOSSIP_STATUSES_FILTER_OUT]

Expand Down Expand Up @@ -252,7 +253,7 @@ def check_schema_agreement_in_gossip_and_peers(node, retries: int = CHECK_NODE_H
for current_node, data in gossip_info.items():
if not (data['status'] == "NORMAL" and current_node in peers_info):
continue
if data["schema"] != peers_info[current_node]['schema_version']:
if data["schema"] != str(peers_info[current_node]['schema_version']):
current_err = (f"{message_pref} Schema version is not same in "
f"the gossip and peers for {current_node}")
LOGGER.warning(current_err)
Expand Down
11 changes: 7 additions & 4 deletions unit_tests/test_utils_health_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@


import unittest
from unittest.mock import MagicMock
from uuid import UUID

from sdcm.sct_events import Severity
from sdcm.utils.health_checker import (
Expand All @@ -31,6 +33,7 @@ def __init__(self, ip_address, name):
self.ip_address = ip_address
self.name = name
self.running_nemesis = None
self.parent_cluster = MagicMock()

@staticmethod
def print_node_running_nemesis(_):
Expand Down Expand Up @@ -93,19 +96,19 @@ def get_gossip_info(self):
PEERS_INFO = {
node2: {
'data_center': 'datacenter1',
'host_id': 'b231fe54-8093-4d5c-9a35-b5e34dc81500',
'host_id': UUID('b231fe54-8093-4d5c-9a35-b5e34dc81500'),
'rack': 'rack1',
'release_version': '3.0.8',
'rpc_address': '127.0.0.2',
'schema_version': 'cbe15453-33f3-3387-aaf1-4120548f41e8',
'schema_version': UUID('cbe15453-33f3-3387-aaf1-4120548f41e8'),
},
node3: {
'data_center': 'datacenter1',
'host_id': 'e11cb4ea-a129-48aa-a9e9-7815dcd2828c',
'host_id': UUID('e11cb4ea-a129-48aa-a9e9-7815dcd2828c'),
'rack': 'rack1',
'release_version': '3.0.8',
'rpc_address': '127.0.0.3',
'schema_version': 'cbe15453-33f3-3387-aaf1-4120548f41e8',
'schema_version': UUID('cbe15453-33f3-3387-aaf1-4120548f41e8'),
},
}

Expand Down

0 comments on commit 1e2dd0f

Please sign in to comment.