From 019afc43a88423332e7f6af944a1b50e112bd702 Mon Sep 17 00:00:00 2001 From: jneo8 Date: Tue, 8 Oct 2024 16:16:05 +0800 Subject: [PATCH] refactor: Remove HwToolHelper - Restructure StrategyABC to have consistence interface - Remove HWToolHelper and move the logic into service layer - Change the `install` function return on BaseExporter for blocked message required - New ResourceMixin embedded into BaseExporter --- src/charm.py | 36 +++------- src/hw_tools.py | 184 +++++++----------------------------------------- src/service.py | 179 ++++++++++++++++++++++++++++------------------ 3 files changed, 144 insertions(+), 255 deletions(-) diff --git a/src/charm.py b/src/charm.py index ec8a8534..3ca62622 100755 --- a/src/charm.py +++ b/src/charm.py @@ -12,7 +12,7 @@ from ops.framework import EventBase, StoredState from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus -from hw_tools import HWTool, HWToolHelper, detect_available_tools, remove_legacy_smartctl_exporter +from hw_tools import HWTool, detect_available_tools, remove_legacy_smartctl_exporter from service import BaseExporter, DCGMExporter, ExporterError, HardwareExporter, SmartCtlExporter logger = logging.getLogger(__name__) @@ -26,7 +26,6 @@ class HardwareObserverCharm(ops.CharmBase): def __init__(self, *args: Any) -> None: """Init.""" super().__init__(*args) - self.hw_tool_helper = HWToolHelper() # Add refresh_events to COSAgentProvider to update relation data when # config changed (default behavior) and upgrade charm. This is useful @@ -75,6 +74,7 @@ def exporters(self) -> List[BaseExporter]: exporters.append( HardwareExporter( self.charm_dir, + self.model.resources, self.model.config, stored_tools, ) @@ -134,47 +134,30 @@ def _on_install_or_upgrade(self, event: EventBase) -> None: remove_legacy_smartctl_exporter() - stored_tools = self.get_stored_tools() - msg: str resource_installed: bool - # Install hw tools - resource_installed, msg = self.hw_tool_helper.install(self.model.resources, stored_tools) - - self._stored.resource_installed = resource_installed - if not resource_installed: - logger.warning(msg) - self.model.unit.status = BlockedStatus(msg) - return - # Install exporter services and resources for exporter in self.exporters: - exporter_install_ok = exporter.install() + exporter_install_ok, msg = exporter.install() if not exporter_install_ok: resource_installed = False self._stored.resource_installed = resource_installed - msg = f"Exporter {exporter.exporter_name} install failed" logger.warning(msg) self.model.unit.status = BlockedStatus(msg) return + self._stored.resource_installed = True self._on_update_status(event) def _on_remove(self, _: EventBase) -> None: """Remove everything when charm is being removed.""" logger.info("Start to remove.") - # Remove binary tool - self.hw_tool_helper.remove( - self.model.resources, - self.get_stored_tools(), - ) - self._stored.resource_installed = False - # Remove exporters for exporter in self.exporters: self.model.unit.status = MaintenanceStatus(f"Removing {exporter.exporter_name}...") exporter.uninstall() + self._stored.resource_installed = False def _on_update_status(self, _: EventBase) -> None: # noqa: C901 """Update the charm's status.""" @@ -187,16 +170,15 @@ def _on_update_status(self, _: EventBase) -> None: # noqa: C901 return for exporter in self.exporters: + resource_ready, resource_ready_message = exporter.check_resources() + if not resource_ready: + self.model.unit.status = BlockedStatus(resource_ready_message) + return config_valid, config_valid_message = exporter.validate_exporter_configs() if not config_valid: self.model.unit.status = BlockedStatus(config_valid_message) return - hw_tool_ok, error_msg = self.hw_tool_helper.check_installed(self.get_stored_tools()) - if not hw_tool_ok: - self.model.unit.status = BlockedStatus(error_msg) - return - # Check health of all exporters exporters_health = [self._check_exporter_health(exporter) for exporter in self.exporters] diff --git a/src/hw_tools.py b/src/hw_tools.py index b83cd565..0d8bf8d9 100644 --- a/src/hw_tools.py +++ b/src/hw_tools.py @@ -11,14 +11,14 @@ import subprocess from abc import ABCMeta, abstractmethod from pathlib import Path -from typing import Dict, List, Set, Tuple +from typing import List, Set import requests import urllib3 from charms.operator_libs_linux.v0 import apt from charms.operator_libs_linux.v1 import systemd from charms.operator_libs_linux.v2 import snap -from ops.model import ModelError, Resources +from ops.model import Resources import apt_helpers from checksum import ( @@ -159,32 +159,28 @@ def name(self) -> HWTool: def check(self) -> bool: """Check installation status of the tool.""" - -class APTStrategyABC(StrategyABC, metaclass=ABCMeta): - """Strategy for apt install tool.""" - - @abstractmethod def install(self) -> None: """Installation details.""" - @abstractmethod def remove(self) -> None: """Remove details.""" - # Note: The repo and keys should be remove when removing - # hook is triggered. But currently the apt lib don't have - # the remove option. class TPRStrategyABC(StrategyABC, metaclass=ABCMeta): """Third party resource strategy class.""" - @abstractmethod - def install(self, path: Path) -> None: - """Installation details.""" + resources: Resources - @abstractmethod - def remove(self) -> None: - """Remove details.""" + def __init__(self, resources: Resources) -> None: + """Inject the Resource object for fetching resource.""" + self.resources = resources + + def _fetch_tool(self) -> Path: + path = self.resources.fetch(TPR_RESOURCES[self._name]) + if file_is_empty(path): + logger.info("Skipping %s resource install since empty file was detected.", self.name) + raise ResourceFileSizeZeroError(tool=self._name, path=path) + return path class SnapStrategy(StrategyABC): @@ -266,7 +262,7 @@ def _create_custom_metrics(self) -> None: raise err -class NVIDIADriverStrategy(APTStrategyABC): +class NVIDIADriverStrategy(StrategyABC): """NVIDIA driver strategy class.""" _name = HWTool.NVIDIA_DRIVER @@ -364,11 +360,10 @@ class StorCLIStrategy(TPRStrategyABC): origin_path = Path("/opt/MegaRAID/storcli/storcli64") symlink_bin = TOOLS_DIR / HWTool.STORCLI.value - def install(self, path: Path) -> None: + def install(self) -> None: """Install storcli.""" - if file_is_empty(path): - logger.info("Skipping StorCLI resource install since empty file was detected.") - raise ResourceFileSizeZeroError(tool=self._name, path=path) + path = self._fetch_tool() + if not validate_checksum(STORCLI_VERSION_INFOS, path): raise ResourceChecksumError install_deb(self.name, path) @@ -392,11 +387,9 @@ class PercCLIStrategy(TPRStrategyABC): origin_path = Path("/opt/MegaRAID/perccli/perccli64") symlink_bin = TOOLS_DIR / HWTool.PERCCLI.value - def install(self, path: Path) -> None: + def install(self) -> None: """Install perccli.""" - if file_is_empty(path): - logger.info("Skipping PERCCLI resource install since empty file was detected.") - raise ResourceFileSizeZeroError(tool=self._name, path=path) + path = self._fetch_tool() if not validate_checksum(PERCCLI_VERSION_INFOS, path): raise ResourceChecksumError install_deb(self.name, path) @@ -419,11 +412,9 @@ class SAS2IRCUStrategy(TPRStrategyABC): _name = HWTool.SAS2IRCU symlink_bin = TOOLS_DIR / HWTool.SAS2IRCU.value - def install(self, path: Path) -> None: + def install(self) -> None: """Install sas2ircu.""" - if file_is_empty(path): - logger.info("Skipping SAS2IRCU resource install since empty file was detected.") - raise ResourceFileSizeZeroError(tool=self._name, path=path) + path = self._fetch_tool() if not validate_checksum(SAS2IRCU_VERSION_INFOS, path): raise ResourceChecksumError make_executable(path) @@ -445,18 +436,16 @@ class SAS3IRCUStrategy(SAS2IRCUStrategy): _name = HWTool.SAS3IRCU symlink_bin = TOOLS_DIR / HWTool.SAS3IRCU.value - def install(self, path: Path) -> None: + def install(self) -> None: """Install sas3ircu.""" - if file_is_empty(path): - logger.info("Skipping SAS3IRCU resource install since empty file was detected.") - raise ResourceFileSizeZeroError(tool=self._name, path=path) + path = self._fetch_tool() if not validate_checksum(SAS3IRCU_VERSION_INFOS, path): raise ResourceChecksumError make_executable(path) symlink(src=path, dst=self.symlink_bin) -class SSACLIStrategy(APTStrategyABC): +class SSACLIStrategy(StrategyABC): """Strategy for install ssacli.""" _name = HWTool.SSACLI @@ -489,7 +478,7 @@ def check(self) -> bool: return check_deb_pkg_installed(self.pkg) -class IPMIStrategy(APTStrategyABC): +class IPMIStrategy(StrategyABC): """Strategy for installing ipmi.""" freeipmi_pkg = "freeipmi-tools" @@ -730,126 +719,3 @@ def remove_legacy_smartctl_exporter() -> None: smartctl_exporter_config_path.unlink() if smartctl_exporter.exists(): shutil.rmtree("/opt/SmartCtlExporter/") - - -class HWToolHelper: - """Helper to install vendor's or hardware related tools.""" - - @property - def strategies(self) -> List[StrategyABC]: - """Define strategies for every tools.""" - return [ - StorCLIStrategy(), - PercCLIStrategy(), - SAS2IRCUStrategy(), - SAS3IRCUStrategy(), - SSACLIStrategy(), - IPMISELStrategy(), - IPMIDCMIStrategy(), - IPMISENSORStrategy(), - RedFishStrategy(), - ] - - def fetch_tools( # pylint: disable=W0102 - self, - resources: Resources, - hw_available: Set[HWTool] = set(), - ) -> Dict[HWTool, Path]: - """Fetch resource from juju if it's VENDOR_TOOLS.""" - fetch_tools: Dict[HWTool, Path] = {} - # Fetch all tools from juju resources - for tool, resource in TPR_RESOURCES.items(): - if tool not in hw_available: - logger.info("Skip fetch tool: %s", tool) - continue - try: - path = resources.fetch(resource) - fetch_tools[tool] = path - except ModelError: - logger.warning("Fail to fetch tool: %s", resource) - - return fetch_tools - - def check_missing_resources( - self, hw_available: Set[HWTool], fetch_tools: Dict[HWTool, Path] - ) -> Tuple[bool, str]: - """Check if required resources are not been uploaded.""" - missing_resources = [] - for tool in hw_available: - if tool in TPR_RESOURCES: - # Resource hasn't been uploaded - if tool not in fetch_tools: - missing_resources.append(TPR_RESOURCES[tool]) - # Uploaded but file size is zero - path = fetch_tools.get(tool) - if path and file_is_empty(path): - logger.warning( - "Empty resource file detected for tool %s at path %s", tool, path - ) - missing_resources.append(TPR_RESOURCES[tool]) - if missing_resources: - return False, f"Missing resources: {missing_resources}" - return True, "" - - def install(self, resources: Resources, hw_available: Set[HWTool]) -> Tuple[bool, str]: - """Install tools.""" - logger.info("hw_available: %s", hw_available) - - fetch_tools: Dict[HWTool, Path] = self.fetch_tools(resources, hw_available) - - ok, msg = self.check_missing_resources(hw_available, fetch_tools) - if not ok: - return ok, msg - - fail_strategies = [] - - # Iterate over each strategy and execute. - for strategy in self.strategies: - if strategy.name not in hw_available: - continue - try: - if isinstance(strategy, TPRStrategyABC): - path = fetch_tools.get(strategy.name) # pylint: disable=W0212 - if path: - strategy.install(path) - - elif isinstance(strategy, (APTStrategyABC, SnapStrategy)): - strategy.install() # pylint: disable=E1120 - logger.info("Strategy %s install success", strategy) - except ( - ResourceFileSizeZeroError, - OSError, - apt.PackageError, - ResourceChecksumError, - ) as e: - logger.warning("Strategy %s install fail: %s", strategy, e) - fail_strategies.append(strategy.name) - - if fail_strategies: - return False, f"Fail strategies: {fail_strategies}" - return True, "" - - # pylint: disable=W0613 - def remove(self, resources: Resources, hw_available: Set[HWTool]) -> None: - """Execute all remove strategies.""" - for strategy in self.strategies: - if strategy.name not in hw_available: - continue - if isinstance(strategy, (TPRStrategyABC, APTStrategyABC, SnapStrategy)): - strategy.remove() - logger.info("Strategy %s remove success", strategy) - - def check_installed(self, hw_available: Set[HWTool]) -> Tuple[bool, str]: - """Check tool status.""" - failed_checks: Set[HWTool] = set() - - for strategy in self.strategies: - if strategy.name not in hw_available: - continue - ok = strategy.check() - if not ok: - failed_checks.add(strategy.name) - - if failed_checks: - return False, f"Fail strategy checks: {failed_checks}" - return True, "" diff --git a/src/service.py b/src/service.py index 38b30a0a..0cadfca7 100644 --- a/src/service.py +++ b/src/service.py @@ -5,15 +5,19 @@ from logging import getLogger from pathlib import Path from time import sleep -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple +from charms.operator_libs_linux.v0 import apt from charms.operator_libs_linux.v1 import systemd from charms.operator_libs_linux.v2 import snap from jinja2 import Environment, FileSystemLoader -from ops.model import ConfigData +from ops.model import ConfigData, Resources from redfish import redfish_client from redfish.rest.v1 import InvalidCredentialsError +from checksum import ( + ResourceChecksumError, +) from config import ( HARDWARE_EXPORTER_COLLECTOR_MAPPING, HARDWARE_EXPORTER_SETTINGS, @@ -22,11 +26,20 @@ ) from hardware import get_bmc_address from hw_tools import ( - APTStrategyABC, DCGMExporterStrategy, NVIDIADriverStrategy, SmartCtlExporterStrategy, SnapStrategy, + StrategyABC, + StorCLIStrategy, + PercCLIStrategy, + SAS2IRCUStrategy, + SAS3IRCUStrategy, + IPMISELStrategy, + IPMIDCMIStrategy, + IPMISENSORStrategy, + RedFishStrategy, + ResourceFileSizeZeroError, ) logger = getLogger(__name__) @@ -39,21 +52,68 @@ class ExporterError(Exception): """ -class BaseExporter(ABC): - """A class representing the exporter and the metric endpoints.""" +class ResourceMixin(): + """Mixin to handle multiple resource strategy install/remove/check.""" - config: ConfigData - exporter_name: str - port: int - log_level: str + strategies: List[StrategyABC] + available_tools: Set[HWTool] @staticmethod @abstractmethod def hw_tools() -> Set[HWTool]: """Return hardware tools to watch.""" + def install_resources(self) -> Tuple[bool, str]: + """Run install for all strategies.""" + fail_strategies = [] + try: + for strategy in self.strategies: + if strategy.name in self.available_tools: + strategy.install() + except ( + ResourceFileSizeZeroError, + OSError, + apt.PackageError, + ResourceChecksumError, + ) as e: + logger.warning("Strategy %s install fail: %s", strategy, e) + fail_strategies.append(strategy.name) + if fail_strategies: + return False, f"Fail strategies: {fail_strategies}" + return True, "" + + def remove_resources(self) -> bool: + """Run remove for strategies.""" + for strategy in self.strategies: + if strategy.name in self.available_tools: + strategy.remove() + return True + + def check_resources(self) -> Tuple[bool, str]: + """Run check for strategies.""" + failed_checks: Set[HWTool] = set() + for strategy in self.strategies: + if strategy.name in self.available_tools: + continue + strategy.check() + ok = strategy.check() + if not ok: + failed_checks.add(strategy.name) + if failed_checks: + return False, f"Fail strategy checks: {failed_checks}" + return True, "" + + +class BaseExporter(ABC, ResourceMixin): + """A class representing the exporter and the metric endpoints.""" + + config: ConfigData + exporter_name: str + port: int + log_level: str + @abstractmethod - def install(self) -> bool: + def install(self) -> Tuple[bool, str]: """Install the exporter.""" @abstractmethod @@ -99,7 +159,6 @@ class RenderableExporter(BaseExporter): def __init__(self, charm_dir: Path, config: ConfigData, settings: ExporterSettings) -> None: """Initialize the Exporter class.""" self.charm_dir = charm_dir - self.port: int self.settings = settings @@ -110,28 +169,6 @@ def __init__(self, charm_dir: Path, config: ConfigData, settings: ExporterSettin self.log_level = str(config["exporter-log-level"]) - def resources_exist(self) -> bool: - """Return true if required resources exist. - - Overwrite this method if there are resources need to be installed. - """ - return True - - def install_resources(self) -> bool: - """Install the necessary resources for the exporter service. - - Overwrite this method if there are resources need to be installed. - """ - logger.debug("No required resources for %s", self.__class__.__name__) - return True - - def remove_resources(self) -> bool: - """Remove exporter resources. - - Overwrite this method if there are resources need to be removed. - """ - return True - def remove_config(self) -> bool: """Remove exporter configuration file.""" if self.exporter_config_path is not None and self.exporter_config_path.exists(): @@ -194,40 +231,38 @@ def verify_render_files_exist(self) -> bool: service_file_exists = self.exporter_service_path.exists() return service_file_exists and config_file_exists - def install(self) -> bool: + def install(self) -> Tuple[bool, str]: """Install the exporter.""" logger.info("Installing %s.", self.exporter_name) # Install resources - install_resource_success = self.install_resources() + install_resource_success, msg = self.install_resources() if not install_resource_success: logger.error("Failed to install %s resources.", self.exporter_name) - return False - if not self.resources_exist(): - logger.error("%s resources are not installed properly.", self.exporter_name) - # pylint: disable=too-many-instance-attributes - return False + return False, msg # Render config configure_success = self.configure() if not configure_success: - logger.error("Failed to render config files for %s.", self.exporter_name) - return False + msg = f"Failed to render config files for {self.exporter_name}." + logger.error(msg) + return False, msg # Install service render_service_success = self.render_service() if not render_service_success: - logger.error("Failed to install %s.", self.exporter_name) - return False + msg = f"Failed to install {self.exporter_name}." + logger.error(msg) + return False, msg if not self.verify_render_files_exist(): - logger.error("%s is not installed properly.", self.exporter_name) - return False + msg = f"{self.exporter_name} is not installed properly." + return False, msg systemd.daemon_reload() logger.info("%s installed.", self.exporter_name) - return True + return True, "" def uninstall(self) -> bool: """Uninstall the exporter.""" @@ -320,7 +355,6 @@ class SnapExporter(BaseExporter): exporter_name: str channel: str port: int - strategies: List[Union[SnapStrategy, APTStrategyABC]] def __init__(self, config: ConfigData): """Init.""" @@ -336,35 +370,25 @@ def hw_tools() -> Set[HWTool]: """Return hardware tools to watch.""" return set() - def install(self) -> bool: + def install(self) -> Tuple[bool, str]: """Install the snap from a channel. Returns true if the install is successful, false otherwise. """ - try: - for strategy in self.strategies: - strategy.install() - self.enable_and_start() - return self.snap_client.present is True - except Exception as err: # pylint: disable=broad-except - logger.error("Failed to install %s: %s", strategy.name, err) - return False + ok, msg = self.install_resources() + if not ok: + return False, msg + self.enable_and_start() + if self.snap_client.present is False: + return False, "{self.exporter_name} service is not present" + return True, "" def uninstall(self) -> bool: """Uninstall the snap. Returns true if the uninstall is successful, false otherwise. """ - try: - for strategy in self.strategies: - strategy.remove() - - # using the snap.SnapError will result into: - # TypeError: catching classes that do not inherit from BaseException is not allowed - except Exception as err: # pylint: disable=broad-except - logger.error("Failed to remove %s: %s", strategy.name, err) - return False - + self.remove_resources() return self.snap_client.present is False def enable_and_start(self) -> None: @@ -477,9 +501,15 @@ def configure(self) -> bool: class HardwareExporter(RenderableExporter): """A class representing the hardware exporter and the metric endpoints.""" - required_config: bool = True + resources: Resources - def __init__(self, charm_dir: Path, config: ConfigData, available_tools: Set[HWTool]) -> None: + def __init__( + self, + charm_dir: Path, + resources: Resources, + config: ConfigData, + available_tools: Set[HWTool], + ) -> None: """Initialize the Hardware Exporter class.""" super().__init__(charm_dir, config, HARDWARE_EXPORTER_SETTINGS) @@ -490,6 +520,17 @@ def __init__(self, charm_dir: Path, config: ConfigData, available_tools: Set[HWT self.available_tools = available_tools self.collect_timeout = int(config["collect-timeout"]) self.bmc_address = get_bmc_address() + self.resources = resources + self.strategies = [ + StorCLIStrategy(resources=self.resources), + PercCLIStrategy(resources=self.resources), + SAS2IRCUStrategy(resources=self.resources), + SAS3IRCUStrategy(resources=self.resources), + IPMISELStrategy(), + IPMIDCMIStrategy(), + IPMISENSORStrategy(), + RedFishStrategy(), + ] def _render_config_content(self) -> str: """Render and install exporter config file."""