Skip to content

Commit

Permalink
Refactor redfish login and logout methods. (#64)
Browse files Browse the repository at this point in the history
* Refactor redfish login and logout methods.

redfish context manager could potentially failed during __enter__ which
might causes the __exit__ function never called. We should ensure the
redfish client is logged out in any exception. This can be achieved by
adding a finally block to the try-except block.

* Bump redfish default timeout from 3s to 15s.
  • Loading branch information
chanchiwai-ray authored Mar 8, 2024
1 parent ba509b6 commit a3e06c5
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 200 deletions.
2 changes: 1 addition & 1 deletion prometheus_hardware_exporter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def parse_command_line() -> argparse.Namespace:
)
parser.add_argument(
"--redfish-client-timeout",
help="Redfish client timeout in seconds for initial connection (default: 3) ",
help="Redfish client timeout in seconds for initial connection (default: 15) ",
default=DEFAULT_REDFISH_CLIENT_TIMEOUT,
type=int,
)
Expand Down
61 changes: 34 additions & 27 deletions prometheus_hardware_exporter/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,41 +995,48 @@ def fetch(self) -> List[Payload]:
if not service_status:
return payloads

redfish_helper = None
try:
with RedfishHelper(self.config) as redfish_helper:
payloads.append(Payload(name="redfish_call_success", value=1.0))

processor_count: Dict[str, int]
processor_data: Dict[str, List]
storage_controller_count: Dict[str, int]
storage_controller_data: Dict[str, List]
storage_drive_count: Dict[str, int]
storage_drive_data: Dict[str, List]
memory_dimm_count: Dict[str, int]
memory_dimm_data: Dict[str, List]

sensor_data: Dict[str, List] = redfish_helper.get_sensor_data()
processor_count, processor_data = redfish_helper.get_processor_data()
(
storage_controller_count,
storage_controller_data,
) = redfish_helper.get_storage_controller_data()
network_adapter_count: Dict[str, Any] = redfish_helper.get_network_adapter_data()
chassis_data: Dict[str, Dict] = redfish_helper.get_chassis_data()
storage_drive_count, storage_drive_data = redfish_helper.get_storage_drive_data()
memory_dimm_count, memory_dimm_data = redfish_helper.get_memory_dimm_data()
smart_storage_health_data: Dict[str, Any] = (
redfish_helper.get_smart_storage_health_data()
)
except (
redfish_helper = RedfishHelper(self.config)
redfish_helper.login()
payloads.append(Payload(name="redfish_call_success", value=1.0))

processor_count: Dict[str, int]
processor_data: Dict[str, List]
storage_controller_count: Dict[str, int]
storage_controller_data: Dict[str, List]
storage_drive_count: Dict[str, int]
storage_drive_data: Dict[str, List]
memory_dimm_count: Dict[str, int]
memory_dimm_data: Dict[str, List]

sensor_data: Dict[str, List] = redfish_helper.get_sensor_data()
processor_count, processor_data = redfish_helper.get_processor_data()
(
storage_controller_count,
storage_controller_data,
) = redfish_helper.get_storage_controller_data()
network_adapter_count: Dict[str, Any] = redfish_helper.get_network_adapter_data()
chassis_data: Dict[str, Dict] = redfish_helper.get_chassis_data()
storage_drive_count, storage_drive_data = redfish_helper.get_storage_drive_data()
memory_dimm_count, memory_dimm_data = redfish_helper.get_memory_dimm_data()
smart_storage_health_data: Dict[str, Any] = (
redfish_helper.get_smart_storage_health_data()
)
except ( # pylint: disable=W0718
ConnectionError,
InvalidCredentialsError,
RetriesExhaustedError,
SessionCreationError,
Exception,
) as err:
logger.exception("Exception occurred while getting redfish object: %s", err)
logger.exception("Exception occurred while using redfish object: %s", err)
payloads.append(Payload(name="redfish_call_success", value=0.0))
return payloads
finally:
# Guarding against `RedfishHelper` crashes, and `redfish_helper` remains `None`
if redfish_helper:
redfish_helper.logout()

metrics: Dict[str, Any] = {
"sensor_data": sensor_data,
Expand Down
31 changes: 16 additions & 15 deletions prometheus_hardware_exporter/collectors/redfish.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from cachetools.func import ttl_cache
from redfish import redfish_client
from redfish.rest.v1 import (
HttpClient,
InvalidCredentialsError,
RestResponse,
RetriesExhaustedError,
Expand Down Expand Up @@ -76,30 +75,32 @@ def __init__(self, config: Config) -> None:
self.password = config.redfish_password
self.timeout = config.redfish_client_timeout
self.max_retry = config.redfish_client_max_retry
self.redfish_obj: HttpClient

def __enter__(self) -> Self:
"""Login to redfish while entering context manager."""
logger.debug("(service) Trying to login to redfish service ...")
self.redfish_obj = redfish_client(
base_url=self.host,
username=self.username,
password=self.password,
timeout=self.timeout,
max_retry=self.max_retry,
)
self.redfish_obj.login(auth="session")

def __enter__(self) -> Self:
"""Login as a context manager."""
self.login()
return self

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""Logout from redfish while exiting context manager."""
"""Logout as a context manager."""
self.logout()

def login(self) -> None:
"""Login to redfish."""
logger.debug("(service) Trying to login to redfish service ...")
self.redfish_obj.login(auth="session")

def logout(self) -> None:
"""Logout from redfish."""
if self.redfish_obj is not None:
self.redfish_obj.logout()
logger.debug("(service) Logged out from redfish service ...")
self.redfish_obj.logout()
logger.debug("(service) Logged out from redfish service ...")

def get_sensor_data(self) -> Dict[str, List]:
"""Get sensor data.
Expand Down Expand Up @@ -136,12 +137,12 @@ def _retrieve_redfish_sensor_data(self) -> List[Any]:
sensors = redfish_utilities.get_sensors(self.redfish_obj)
return sensors

def _verify_redfish_call(self, redfish_obj: HttpClient, uri: str) -> Optional[Dict[str, Any]]:
def _verify_redfish_call(self, uri: str) -> Optional[Dict[str, Any]]:
"""Return REST response for GET request API call on the URI.
Returns None if URI isn't present.
"""
resp: RestResponse = redfish_obj.get(uri)
resp: RestResponse = self.redfish_obj.get(uri)
if resp.status == 200:
return resp.dict
logger.debug("Not able to query from URI: %s.", uri)
Expand Down Expand Up @@ -343,7 +344,7 @@ def get_network_adapter_data(self) -> Dict[str, int]:
# eg: /redfish/v1/Chassis/1/NetworkAdapters
network_adapters_root_uri = network_adapters_root_uri_pattern.format(chassis_id)
network_adapters: Optional[Dict[str, Any]] = self._verify_redfish_call(
self.redfish_obj, network_adapters_root_uri
network_adapters_root_uri
)
if not network_adapters:
logger.debug("No network adapters could be found on chassis id: %s", chassis_id)
Expand Down Expand Up @@ -579,7 +580,7 @@ def get_smart_storage_health_data(self) -> Dict[str, Any]:
for chassis_id in chassis_ids:
smart_storage_uri = smart_storage_root_uri_pattern.format(chassis_id)
smart_storage_data: Optional[Dict[str, Any]] = self._verify_redfish_call(
self.redfish_obj, smart_storage_uri
smart_storage_uri
)
if not smart_storage_data:
logger.debug("Smart Storage URI endpoint not found for chassis ID: %s", chassis_id)
Expand Down
2 changes: 1 addition & 1 deletion prometheus_hardware_exporter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
DEFAULT_CONFIG = os.path.join(os.environ.get("SNAP_DATA", "./"), "config.yaml")

DEFAULT_IPMI_SEL_INTERVAL = 86400
DEFAULT_REDFISH_CLIENT_TIMEOUT = 3
DEFAULT_REDFISH_CLIENT_TIMEOUT = 15
DEFAULT_REDFISH_CLIENT_MAX_RETRY = 1
DEFAULT_REDFISH_DISCOVER_CACHE_TTL = 86400

Expand Down
30 changes: 20 additions & 10 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,12 +635,17 @@ def _discover(*args, **kwargs):
mock_redfish_client.assert_not_called()

@patch("prometheus_hardware_exporter.collector.logger")
@patch("prometheus_hardware_exporter.collector.RedfishHelper")
@patch("prometheus_hardware_exporter.collectors.redfish.redfish_client")
@patch(
"prometheus_hardware_exporter.collectors.redfish.RedfishHelper.get_cached_discover_method"
)
def test_redfish_no_login(
self, mock_get_cached_discover_method, mock_redfish_client, mock_logger
self,
mock_get_cached_discover_method,
mock_redfish_client,
mock_redfish_helper,
mock_logger,
):
"""Test redfish collector when redfish login doesn't work."""

Expand All @@ -652,29 +657,33 @@ def _discover(*args, **kwargs):

mock_get_cached_discover_method.side_effect = cached_discover
redfish_collector = RedfishCollector(Mock())
redfish_collector.redfish_helper = Mock()

mock_helper = Mock()
mock_redfish_helper.return_value = mock_helper

for err in [
Exception(),
ConnectionError(),
InvalidCredentialsError(),
SessionCreationError(),
RetriesExhaustedError(),
]:
mock_redfish_client.side_effect = err
mock_helper.login.side_effect = err
payloads = redfish_collector.collect()

# Check whether these 2 payloads are present
# redfish_service_available which is set to value 1
# redfish_call_success which is set to value 0
self.assertEqual(len(list(payloads)), 2)
mock_logger.exception.assert_called_with(
"Exception occurred while getting redfish object: %s", err
"Exception occurred while using redfish object: %s", err
)
mock_redfish_client.assert_called()
mock_redfish_helper.assert_called()
mock_helper.login.assert_called()
mock_helper.logout.assert_called()

@patch("prometheus_hardware_exporter.collector.logger")
@patch("prometheus_hardware_exporter.collectors.redfish.RedfishHelper.__exit__")
@patch("prometheus_hardware_exporter.collectors.redfish.RedfishHelper.__enter__")
@patch("prometheus_hardware_exporter.collector.RedfishHelper")
@patch("prometheus_hardware_exporter.collectors.redfish.redfish_client")
@patch(
"prometheus_hardware_exporter.collectors.redfish.RedfishHelper.get_cached_discover_method"
Expand All @@ -683,8 +692,7 @@ def test_redfish_installed_and_okay(
self,
mock_get_cached_discover_method,
mock_redfish_client,
mock_redfish_helper_enter,
mock_redfish_helper_exit,
mock_redfish_helper,
mock_logger,
):
"""Test redfish collector and check if all metrics are present in payload."""
Expand All @@ -700,7 +708,7 @@ def _discover(*args, **kwargs):
redfish_collector = RedfishCollector(Mock())

mock_helper = Mock()
mock_redfish_helper_enter.return_value = mock_helper
mock_redfish_helper.return_value = mock_helper

mock_get_sensor_data = Mock()
mock_get_processor_data = Mock()
Expand Down Expand Up @@ -861,6 +869,8 @@ def _discover(*args, **kwargs):
for payload in payloads:
self.assertIn(payload.name, available_metrics)

mock_helper.login.assert_called()
mock_helper.logout.assert_called()
mock_logger.warning.assert_called_with(
"Failed to get %s via redfish", "network_adapter_count"
)
Expand Down
Loading

0 comments on commit a3e06c5

Please sign in to comment.