Skip to content

Commit

Permalink
Merge branch 'main' into metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
Deezzir authored Oct 2, 2024
2 parents 6227f7e + 4885286 commit b9f39e4
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 328 deletions.
6 changes: 6 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ options:
description: |
Channel to install the DCGM snap if the hardware has NVIDIA GPU. By default, it will install
from latest/stable
smartctl-exporter-snap-channel:
type: string
default: "latest/stable"
description: |
Channel to install the Smartctl exporter snap if the hardware has smart disk. By default, it will install
from latest/stable.
exporter-log-level:
type: string
default: "INFO"
Expand Down
8 changes: 6 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ops.framework import EventBase, StoredState
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus

from hw_tools import HWTool, HWToolHelper, detect_available_tools
from hw_tools import HWTool, HWToolHelper, detect_available_tools, remove_legacy_smartctl_exporter
from service import BaseExporter, DCGMExporter, ExporterError, HardwareExporter, SmartCtlExporter

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -81,7 +81,7 @@ def exporters(self) -> List[BaseExporter]:
)

if stored_tools & SmartCtlExporter.hw_tools():
exporters.append(SmartCtlExporter(self.charm_dir, self.model.config))
exporters.append(SmartCtlExporter(self.model.config))

if stored_tools & DCGMExporter.hw_tools():
exporters.append(DCGMExporter(self.charm_dir, self.model.config))
Expand All @@ -97,6 +97,8 @@ def get_stored_tools(self) -> Set[HWTool]:
if not self._stored.stored_tools: # type: ignore[truthy-function]
available_tools = detect_available_tools() # type: ignore[unreachable]
self._stored.stored_tools = {tool.value for tool in available_tools}
if "smartctl" in self._stored.stored_tools: # type: ignore[operator]
self._stored.stored_tools.remove("smartctl") # type: ignore[attr-defined]
return {HWTool(value) for value in self._stored.stored_tools} # type: ignore[attr-defined]

def _on_redetect_hardware(self, event: ops.ActionEvent) -> None:
Expand Down Expand Up @@ -130,6 +132,8 @@ def _on_install_or_upgrade(self, event: EventBase) -> None:
"""Install or upgrade charm."""
self.model.unit.status = MaintenanceStatus("Installing resources...")

remove_legacy_smartctl_exporter()

stored_tools = self.get_stored_tools()

msg: str
Expand Down
17 changes: 1 addition & 16 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,6 @@ class HardwareExporterSettings(ExporterSettings): # pylint: disable = too-few-p
HARDWARE_EXPORTER_SETTINGS = HardwareExporterSettings()


class SmartCtlExporterSettings(ExporterSettings): # pylint: disable = too-few-public-methods
"""Constant settings for SmartCtl Exporter."""

name: str = "smartctl-exporter"
config_path: Path = Path(f"/etc/{name}-config.yaml")
service_path: Path = Path(f"/etc/systemd/system/{name}.service")
config_template: str = f"{name}-config.yaml.j2"
service_template: str = f"{name}.service.j2"
crash_msg: str = "SmartCtl exporter crashed unexpectedly, please refer to systemd logs..."


SMARTCTL_EXPORTER_SETTINGS = SmartCtlExporterSettings()


class SystemVendor(str, Enum):
"""Different hardware system vendor."""

Expand Down Expand Up @@ -77,8 +63,7 @@ class HWTool(str, Enum):
IPMI_SEL = "ipmi_sel"
IPMI_SENSOR = "ipmi_sensor"
REDFISH = "redfish"
SMARTCTL = "smartctl"
SMARTCTL_EXPORTER = "smartctl_exporter"
SMARTCTL_EXPORTER = "smartctl-exporter"
DCGM = "dcgm"


Expand Down
113 changes: 31 additions & 82 deletions src/hw_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,19 @@
Define strategy for install, remove and verifier for different hardware.
"""

import io
import logging
import os
import shutil
import stat
import subprocess
import tarfile
from abc import ABCMeta, abstractmethod
from http import HTTPStatus
from pathlib import Path
from typing import Dict, List, Set, Tuple

import requests
import urllib3
from charms.operator_libs_linux.v0 import apt
from charms.operator_libs_linux.v1 import systemd
from charms.operator_libs_linux.v2 import snap
from ops.model import ModelError, Resources

Expand Down Expand Up @@ -63,14 +61,6 @@ def __init__(self, tool: HWTool, path: Path):
self.message = f"Tool: {tool} path: {path} size is zero"


class ResourceInstallationError(Exception):
"""Exception raised when a hardware tool installation fails."""

def __init__(self, tool: HWTool):
"""Init."""
super().__init__(f"Installation failed for tool: {tool}")


def copy_to_snap_common_bin(source: Path, filename: str) -> None:
"""Copy file to $SNAP_COMMON/bin folder."""
Path(f"{SNAP_COMMON}/bin").mkdir(parents=False, exist_ok=True)
Expand Down Expand Up @@ -239,6 +229,16 @@ def __init__(self, channel: str) -> None:
self.channel = channel


class SmartCtlExporterStrategy(SnapStrategy):
"""SmartCtl strategy class."""

_name = HWTool.SMARTCTL_EXPORTER

def __init__(self, channel: str) -> None:
"""Init."""
self.channel = channel


class StorCLIStrategy(TPRStrategyABC):
"""Strategy to install storcli."""

Expand Down Expand Up @@ -444,75 +444,6 @@ def check(self) -> bool:
return True


class SmartCtlStrategy(APTStrategyABC):
"""Strategy for installing ipmi."""

pkg = "smartmontools"
_name = HWTool.SMARTCTL

def install(self) -> None:
apt_helpers.add_pkg_with_candidate_version(self.pkg)

def remove(self) -> None:
# Skip removing because this may cause dependency error
# for other services on the same machine.
logger.info("%s skip removing %s", self._name, self.pkg)

def check(self) -> bool:
"""Check package status."""
return check_deb_pkg_installed(self.pkg)


class SmartCtlExporterStrategy(StrategyABC): # pylint: disable=R0903
"""Install smartctl exporter binary."""

_name = HWTool.SMARTCTL_EXPORTER

_resource_dir = Path("/opt/SmartCtlExporter/")
_release = (
"https://github.com/prometheus-community/"
"smartctl_exporter/releases/download/v0.12.0/smartctl_exporter-0.12.0.linux-amd64.tar.gz"
)
_exporter_name = "smartctl_exporter"
_exporter_path = Path(_resource_dir / "smartctl_exporter")

def install(self) -> None:
"""Install exporter binary from internet."""
logger.debug("Installing SmartCtlExporter")
self._resource_dir.mkdir(parents=True, exist_ok=True)

resp = requests.get(self._release, timeout=60)
if resp.status_code != HTTPStatus.OK:
logger.error("Failed to download smartctl exporter binary.")
raise ResourceInstallationError(self._name)

success = False
fileobj = io.BytesIO(resp.content)
with tarfile.open(fileobj=fileobj, mode="r:gz") as tar:
for member in tar.getmembers():
if member.name.endswith(self._exporter_name):
with open(self._exporter_path, "wb") as outfile:
member_file = tar.extractfile(member)
if member_file:
outfile.write(member_file.read())
success = True
if success:
make_executable(self._exporter_path)
if not success:
logger.error("Failed to install SmartCtlExporter binary.")
raise ResourceInstallationError(self._name)

def remove(self) -> None:
"""Remove downloaded exporter binary."""
logger.debug("Remove SmartCtlExporter")
shutil.rmtree(self._resource_dir)

def check(self) -> bool:
"""Check package status."""
logger.debug("Check SmartCtlExporter resources")
return self._exporter_path.is_file()


def _raid_hw_verifier_hwinfo() -> Set[HWTool]:
"""Verify if a supported RAID card exists on the machine using the hwinfo command."""
hwinfo_output = hwinfo("storage")
Expand Down Expand Up @@ -650,7 +581,7 @@ def bmc_hw_verifier() -> Set[HWTool]:

def disk_hw_verifier() -> Set[HWTool]:
"""Verify if the disk exists on the machine."""
return {HWTool.SMARTCTL} if lshw(class_filter="disk") else set()
return {HWTool.SMARTCTL_EXPORTER} if lshw(class_filter="disk") else set()


def nvidia_gpu_verifier() -> Set[HWTool]:
Expand All @@ -664,6 +595,25 @@ def detect_available_tools() -> Set[HWTool]:
return raid_hw_verifier() | bmc_hw_verifier() | disk_hw_verifier() | nvidia_gpu_verifier()


def remove_legacy_smartctl_exporter() -> None:
"""Remove any legacy tool from older revision.
Workaround for migrating legacy smartctl exporter to snap package.
"""
name = "smartctl-exporter"
smartctl_exporter = Path("opt/SmartCtlExporter/")
smartctl_exporter_config_path = Path(f"/etc/{name}-config.yaml")
smartctl_exporter_service_path = Path(f"/etc/systemd/system/{name}.service")
if smartctl_exporter_service_path.exists():
systemd.service_stop(name)
systemd.service_disable(name)
smartctl_exporter_service_path.unlink()
if smartctl_exporter_config_path.exists():
smartctl_exporter_config_path.unlink()
if smartctl_exporter.exists():
shutil.rmtree("/opt/SmartCtlExporter/")


class HWToolHelper:
"""Helper to install vendor's or hardware related tools."""

Expand All @@ -680,7 +630,6 @@ def strategies(self) -> List[StrategyABC]:
IPMIDCMIStrategy(),
IPMISENSORStrategy(),
RedFishStrategy(),
SmartCtlStrategy(),
]

def fetch_tools( # pylint: disable=W0102
Expand Down
Loading

0 comments on commit b9f39e4

Please sign in to comment.