Skip to content

Commit

Permalink
refactor: Remove HwToolHelper
Browse files Browse the repository at this point in the history
- Restructure StrategyABC to have consistence interface
- Remove HWToolHelper and move the logic into service layer
- Change the `install` function return on BaseExporter for blocked
message required
- New ResourceMixin embedded into BaseExporter

partial fix: canonical#272
  • Loading branch information
jneo8 committed Oct 8, 2024
1 parent c65725d commit 3fee50a
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 255 deletions.
36 changes: 9 additions & 27 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ops.framework import EventBase, StoredState
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus

from hw_tools import HWTool, HWToolHelper, detect_available_tools, remove_legacy_smartctl_exporter
from hw_tools import HWTool, detect_available_tools, remove_legacy_smartctl_exporter
from service import BaseExporter, DCGMExporter, ExporterError, HardwareExporter, SmartCtlExporter

logger = logging.getLogger(__name__)
Expand All @@ -26,7 +26,6 @@ class HardwareObserverCharm(ops.CharmBase):
def __init__(self, *args: Any) -> None:
"""Init."""
super().__init__(*args)
self.hw_tool_helper = HWToolHelper()

# Add refresh_events to COSAgentProvider to update relation data when
# config changed (default behavior) and upgrade charm. This is useful
Expand Down Expand Up @@ -75,6 +74,7 @@ def exporters(self) -> List[BaseExporter]:
exporters.append(
HardwareExporter(
self.charm_dir,
self.model.resources,
self.model.config,
stored_tools,
)
Expand Down Expand Up @@ -134,47 +134,30 @@ def _on_install_or_upgrade(self, event: EventBase) -> None:

remove_legacy_smartctl_exporter()

stored_tools = self.get_stored_tools()

msg: str
resource_installed: bool

# Install hw tools
resource_installed, msg = self.hw_tool_helper.install(self.model.resources, stored_tools)

self._stored.resource_installed = resource_installed
if not resource_installed:
logger.warning(msg)
self.model.unit.status = BlockedStatus(msg)
return

# Install exporter services and resources
for exporter in self.exporters:
exporter_install_ok = exporter.install()
exporter_install_ok, msg = exporter.install()
if not exporter_install_ok:
resource_installed = False
self._stored.resource_installed = resource_installed
msg = f"Exporter {exporter.exporter_name} install failed"
logger.warning(msg)
self.model.unit.status = BlockedStatus(msg)
return

self._stored.resource_installed = True
self._on_update_status(event)

def _on_remove(self, _: EventBase) -> None:
"""Remove everything when charm is being removed."""
logger.info("Start to remove.")
# Remove binary tool
self.hw_tool_helper.remove(
self.model.resources,
self.get_stored_tools(),
)
self._stored.resource_installed = False

# Remove exporters
for exporter in self.exporters:
self.model.unit.status = MaintenanceStatus(f"Removing {exporter.exporter_name}...")
exporter.uninstall()
self._stored.resource_installed = False

def _on_update_status(self, _: EventBase) -> None: # noqa: C901
"""Update the charm's status."""
Expand All @@ -187,16 +170,15 @@ def _on_update_status(self, _: EventBase) -> None: # noqa: C901
return

for exporter in self.exporters:
resource_ready, resource_ready_message = exporter.check_resources()
if not resource_ready:
self.model.unit.status = BlockedStatus(resource_ready_message)
return
config_valid, config_valid_message = exporter.validate_exporter_configs()
if not config_valid:
self.model.unit.status = BlockedStatus(config_valid_message)
return

hw_tool_ok, error_msg = self.hw_tool_helper.check_installed(self.get_stored_tools())
if not hw_tool_ok:
self.model.unit.status = BlockedStatus(error_msg)
return

# Check health of all exporters
exporters_health = [self._check_exporter_health(exporter) for exporter in self.exporters]

Expand Down
184 changes: 25 additions & 159 deletions src/hw_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
import subprocess
from abc import ABCMeta, abstractmethod
from pathlib import Path
from typing import Dict, List, Set, Tuple
from typing import List, Set

import requests
import urllib3
from charms.operator_libs_linux.v0 import apt
from charms.operator_libs_linux.v1 import systemd
from charms.operator_libs_linux.v2 import snap
from ops.model import ModelError, Resources
from ops.model import Resources

import apt_helpers
from checksum import (
Expand Down Expand Up @@ -159,32 +159,28 @@ def name(self) -> HWTool:
def check(self) -> bool:
"""Check installation status of the tool."""


class APTStrategyABC(StrategyABC, metaclass=ABCMeta):
"""Strategy for apt install tool."""

@abstractmethod
def install(self) -> None:
"""Installation details."""

@abstractmethod
def remove(self) -> None:
"""Remove details."""
# Note: The repo and keys should be remove when removing
# hook is triggered. But currently the apt lib don't have
# the remove option.


class TPRStrategyABC(StrategyABC, metaclass=ABCMeta):
"""Third party resource strategy class."""

@abstractmethod
def install(self, path: Path) -> None:
"""Installation details."""
resources: Resources

@abstractmethod
def remove(self) -> None:
"""Remove details."""
def __init__(self, resources: Resources) -> None:
"""Inject the Resource object for fetching resource."""
self.resources = resources

def _fetch_tool(self) -> Path:
path = self.resources.fetch(TPR_RESOURCES[self._name])
if path is None or file_is_empty(path):
logger.info("Skipping %s resource install since empty file was detected.", self.name)
raise ResourceFileSizeZeroError(tool=self._name, path=path)
return path


class SnapStrategy(StrategyABC):
Expand Down Expand Up @@ -266,7 +262,7 @@ def _create_custom_metrics(self) -> None:
raise err


class NVIDIADriverStrategy(APTStrategyABC):
class NVIDIADriverStrategy(StrategyABC):
"""NVIDIA driver strategy class."""

_name = HWTool.NVIDIA_DRIVER
Expand Down Expand Up @@ -364,11 +360,10 @@ class StorCLIStrategy(TPRStrategyABC):
origin_path = Path("/opt/MegaRAID/storcli/storcli64")
symlink_bin = TOOLS_DIR / HWTool.STORCLI.value

def install(self, path: Path) -> None:
def install(self) -> None:
"""Install storcli."""
if file_is_empty(path):
logger.info("Skipping StorCLI resource install since empty file was detected.")
raise ResourceFileSizeZeroError(tool=self._name, path=path)
path = self._fetch_tool()

if not validate_checksum(STORCLI_VERSION_INFOS, path):
raise ResourceChecksumError
install_deb(self.name, path)
Expand All @@ -392,11 +387,9 @@ class PercCLIStrategy(TPRStrategyABC):
origin_path = Path("/opt/MegaRAID/perccli/perccli64")
symlink_bin = TOOLS_DIR / HWTool.PERCCLI.value

def install(self, path: Path) -> None:
def install(self) -> None:
"""Install perccli."""
if file_is_empty(path):
logger.info("Skipping PERCCLI resource install since empty file was detected.")
raise ResourceFileSizeZeroError(tool=self._name, path=path)
path = self._fetch_tool()
if not validate_checksum(PERCCLI_VERSION_INFOS, path):
raise ResourceChecksumError
install_deb(self.name, path)
Expand All @@ -419,11 +412,9 @@ class SAS2IRCUStrategy(TPRStrategyABC):
_name = HWTool.SAS2IRCU
symlink_bin = TOOLS_DIR / HWTool.SAS2IRCU.value

def install(self, path: Path) -> None:
def install(self) -> None:
"""Install sas2ircu."""
if file_is_empty(path):
logger.info("Skipping SAS2IRCU resource install since empty file was detected.")
raise ResourceFileSizeZeroError(tool=self._name, path=path)
path = self._fetch_tool()
if not validate_checksum(SAS2IRCU_VERSION_INFOS, path):
raise ResourceChecksumError
make_executable(path)
Expand All @@ -445,18 +436,16 @@ class SAS3IRCUStrategy(SAS2IRCUStrategy):
_name = HWTool.SAS3IRCU
symlink_bin = TOOLS_DIR / HWTool.SAS3IRCU.value

def install(self, path: Path) -> None:
def install(self) -> None:
"""Install sas3ircu."""
if file_is_empty(path):
logger.info("Skipping SAS3IRCU resource install since empty file was detected.")
raise ResourceFileSizeZeroError(tool=self._name, path=path)
path = self._fetch_tool()
if not validate_checksum(SAS3IRCU_VERSION_INFOS, path):
raise ResourceChecksumError
make_executable(path)
symlink(src=path, dst=self.symlink_bin)


class SSACLIStrategy(APTStrategyABC):
class SSACLIStrategy(StrategyABC):
"""Strategy for install ssacli."""

_name = HWTool.SSACLI
Expand Down Expand Up @@ -489,7 +478,7 @@ def check(self) -> bool:
return check_deb_pkg_installed(self.pkg)


class IPMIStrategy(APTStrategyABC):
class IPMIStrategy(StrategyABC):
"""Strategy for installing ipmi."""

freeipmi_pkg = "freeipmi-tools"
Expand Down Expand Up @@ -730,126 +719,3 @@ def remove_legacy_smartctl_exporter() -> None:
smartctl_exporter_config_path.unlink()
if smartctl_exporter.exists():
shutil.rmtree("/opt/SmartCtlExporter/")


class HWToolHelper:
"""Helper to install vendor's or hardware related tools."""

@property
def strategies(self) -> List[StrategyABC]:
"""Define strategies for every tools."""
return [
StorCLIStrategy(),
PercCLIStrategy(),
SAS2IRCUStrategy(),
SAS3IRCUStrategy(),
SSACLIStrategy(),
IPMISELStrategy(),
IPMIDCMIStrategy(),
IPMISENSORStrategy(),
RedFishStrategy(),
]

def fetch_tools( # pylint: disable=W0102
self,
resources: Resources,
hw_available: Set[HWTool] = set(),
) -> Dict[HWTool, Path]:
"""Fetch resource from juju if it's VENDOR_TOOLS."""
fetch_tools: Dict[HWTool, Path] = {}
# Fetch all tools from juju resources
for tool, resource in TPR_RESOURCES.items():
if tool not in hw_available:
logger.info("Skip fetch tool: %s", tool)
continue
try:
path = resources.fetch(resource)
fetch_tools[tool] = path
except ModelError:
logger.warning("Fail to fetch tool: %s", resource)

return fetch_tools

def check_missing_resources(
self, hw_available: Set[HWTool], fetch_tools: Dict[HWTool, Path]
) -> Tuple[bool, str]:
"""Check if required resources are not been uploaded."""
missing_resources = []
for tool in hw_available:
if tool in TPR_RESOURCES:
# Resource hasn't been uploaded
if tool not in fetch_tools:
missing_resources.append(TPR_RESOURCES[tool])
# Uploaded but file size is zero
path = fetch_tools.get(tool)
if path and file_is_empty(path):
logger.warning(
"Empty resource file detected for tool %s at path %s", tool, path
)
missing_resources.append(TPR_RESOURCES[tool])
if missing_resources:
return False, f"Missing resources: {missing_resources}"
return True, ""

def install(self, resources: Resources, hw_available: Set[HWTool]) -> Tuple[bool, str]:
"""Install tools."""
logger.info("hw_available: %s", hw_available)

fetch_tools: Dict[HWTool, Path] = self.fetch_tools(resources, hw_available)

ok, msg = self.check_missing_resources(hw_available, fetch_tools)
if not ok:
return ok, msg

fail_strategies = []

# Iterate over each strategy and execute.
for strategy in self.strategies:
if strategy.name not in hw_available:
continue
try:
if isinstance(strategy, TPRStrategyABC):
path = fetch_tools.get(strategy.name) # pylint: disable=W0212
if path:
strategy.install(path)

elif isinstance(strategy, (APTStrategyABC, SnapStrategy)):
strategy.install() # pylint: disable=E1120
logger.info("Strategy %s install success", strategy)
except (
ResourceFileSizeZeroError,
OSError,
apt.PackageError,
ResourceChecksumError,
) as e:
logger.warning("Strategy %s install fail: %s", strategy, e)
fail_strategies.append(strategy.name)

if fail_strategies:
return False, f"Fail strategies: {fail_strategies}"
return True, ""

# pylint: disable=W0613
def remove(self, resources: Resources, hw_available: Set[HWTool]) -> None:
"""Execute all remove strategies."""
for strategy in self.strategies:
if strategy.name not in hw_available:
continue
if isinstance(strategy, (TPRStrategyABC, APTStrategyABC, SnapStrategy)):
strategy.remove()
logger.info("Strategy %s remove success", strategy)

def check_installed(self, hw_available: Set[HWTool]) -> Tuple[bool, str]:
"""Check tool status."""
failed_checks: Set[HWTool] = set()

for strategy in self.strategies:
if strategy.name not in hw_available:
continue
ok = strategy.check()
if not ok:
failed_checks.add(strategy.name)

if failed_checks:
return False, f"Fail strategy checks: {failed_checks}"
return True, ""
Loading

0 comments on commit 3fee50a

Please sign in to comment.