diff --git a/config.yaml b/config.yaml index 15f03462..5badf46f 100644 --- a/config.yaml +++ b/config.yaml @@ -3,12 +3,18 @@ # options: - exporter-port: + hardware-exporter-port: type: int default: 10200 description: | - Start the prometheus exporter at "exporter-port". By default, it will - start at port 10200. + Start the prometheus hardware exporter at "hardware-exporter-port". By default, + it will start at port 10200. + smartctl-exporter-port: + type: int + default: 10201 + description: | + Start the prometheus smartctl exporter at "smartctl-exporter-port". By default, + it will start at port 10201. exporter-log-level: type: string default: "INFO" diff --git a/requirements.txt b/requirements.txt index 0be4549b..ae796b14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ distro ops >= 2.2.0 jinja2 redfish # requests is included in this +pydantic < 2 git+https://github.com/canonical/prometheus-hardware-exporter.git diff --git a/src/charm.py b/src/charm.py index bce2f380..647e064d 100755 --- a/src/charm.py +++ b/src/charm.py @@ -5,27 +5,15 @@ """Charm the application.""" import logging -from time import sleep -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, List, Tuple import ops from charms.grafana_agent.v0.cos_agent import COSAgentProvider from ops.framework import EventBase, StoredState from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus -from redfish import redfish_client -from redfish.rest.v1 import InvalidCredentialsError - -from config import ( - EXPORTER_CRASH_MSG, - EXPORTER_HEALTH_RETRY_COUNT, - EXPORTER_HEALTH_RETRY_TIMEOUT, - REDFISH_MAX_RETRY, - REDFISH_TIMEOUT, - HWTool, -) -from hardware import get_bmc_address -from hw_tools import HWToolHelper, get_hw_tool_enable_list -from service import Exporter, ExporterError + +from hw_tools import HWTool, HWToolHelper, get_hw_tool_enable_list +from service import BaseExporter, ExporterError, HardwareExporter, SmartCtlExporter logger = logging.getLogger(__name__) @@ -47,13 +35,20 @@ def __init__(self, *args: Any) -> None: self, refresh_events=[self.on.config_changed, self.on.upgrade_charm], metrics_endpoints=[ - {"path": "/metrics", "port": int(self.model.config["exporter-port"])} + {"path": "/metrics", "port": int(self.model.config["hardware-exporter-port"])}, + {"path": "/metrics", "port": int(self.model.config["smartctl-exporter-port"])}, ], # Setting scrape_timeout as collect_timeout in the `duration` format specified in # https://prometheus.io/docs/prometheus/latest/configuration/configuration/#duration scrape_configs=[{"scrape_timeout": f"{int(self.model.config['collect-timeout'])}s"}], ) - self.exporter = Exporter(self.charm_dir) + + self._stored.set_default( + resource_installed=False, + # Storing only the values from `HWTool` because entire HWTool + # cannot be stored in _stored. Only simple types can be stored. + enabled_hw_tool_list_values=[], + ) self.framework.observe(self.on.config_changed, self._on_config_changed) self.framework.observe(self.on.install, self._on_install_or_upgrade) @@ -68,15 +63,27 @@ def __init__(self, *args: Any) -> None: ) self.framework.observe(self.on.redetect_hardware_action, self._on_redetect_hardware) - self._stored.set_default( - exporter_installed=False, - resource_installed=False, - # Storing only the values from `HWTool` because entire HWTool - # cannot be stored in _stored. Only simple types can be stored. - enabled_hw_tool_list_values=[], - ) self.num_cos_agent_relations = self.get_num_cos_agent_relations("cos-agent") + @property + def exporters(self) -> List[BaseExporter]: + """Return list of exporters based on detected hardware.""" + exporters: List[BaseExporter] = [] + enable_hw_tool_list = self.get_enable_hw_tools() + if set(enable_hw_tool_list) & set(HardwareExporter.hw_tools()): + exporters.append( + HardwareExporter( + self.charm_dir, + self.model.config, + self.get_enable_hw_tools(), + ) + ) + + if set(enable_hw_tool_list) & set(SmartCtlExporter.hw_tools()): + exporters.append(SmartCtlExporter(self.charm_dir, self.model.config)) + + return exporters + def get_enabled_hw_tool_list_values(self) -> List[str]: """Get hw tool list from stored or from machine if not in stored.""" if not self._stored.enabled_hw_tool_list_values: # type: ignore[truthy-function] @@ -89,10 +96,14 @@ def get_hw_tools_from_values(self, hw_tool_values: List[str]) -> List[HWTool]: """Get HWTool objects from hw tool values.""" return [HWTool(value) for value in hw_tool_values] + def get_enable_hw_tools(self) -> List[HWTool]: + """Get enable HWTools.""" + enabled_hw_tool_list_values = self.get_enabled_hw_tool_list_values() + return self.get_hw_tools_from_values(enabled_hw_tool_list_values) + def _on_redetect_hardware(self, event: ops.ActionEvent) -> None: """Detect hardware tool list and option to rerun the install hook.""" - current_hw_tools_value_list = self.get_enabled_hw_tool_list_values() - current_hw_tools_str_list = [str(tool) for tool in current_hw_tools_value_list] + current_hw_tools_str_list = self.get_enable_hw_tools() current_hw_tools_str_list.sort() detected_hw_tool_list = get_hw_tool_enable_list() @@ -124,34 +135,33 @@ def _on_install_or_upgrade(self, event: EventBase) -> None: """Install or upgrade charm.""" self.model.unit.status = MaintenanceStatus("Installing resources...") - enabled_hw_tool_list_values = self.get_enabled_hw_tool_list_values() - enabled_hw_tool_list = self.get_hw_tools_from_values(enabled_hw_tool_list_values) + enabled_hw_tools = self.get_enable_hw_tools() + msg: str + resource_installed: bool + + # Install hw tools resource_installed, msg = self.hw_tool_helper.install( - self.model.resources, enabled_hw_tool_list + self.model.resources, enabled_hw_tools ) - self._stored.resource_installed = resource_installed + self._stored.resource_installed = resource_installed if not resource_installed: logger.warning(msg) self.model.unit.status = BlockedStatus(msg) return - # Install exporter - self.model.unit.status = MaintenanceStatus("Installing exporter...") - success = self.exporter.install( - int(self.model.config["exporter-port"]), - str(self.model.config["exporter-log-level"]), - self.get_redfish_conn_params(enabled_hw_tool_list), - int(self.model.config["collect-timeout"]), - enabled_hw_tool_list, - ) - self._stored.exporter_installed = success - if not success: - msg = "Failed to install exporter, please refer to `juju debug-log`" - logger.error(msg) - self.model.unit.status = BlockedStatus(msg) - return + # Install exporter services and resources + for exporter in self.exporters: + exporter_install_ok = exporter.install() + if not exporter_install_ok: + resource_installed = False + self._stored.resource_installed = resource_installed + msg = f"Exporter {exporter.exporter_name} install failed" + logger.warning(msg) + self.model.unit.status = BlockedStatus(msg) + return + self._on_update_status(event) def _on_remove(self, _: EventBase) -> None: @@ -160,17 +170,14 @@ def _on_remove(self, _: EventBase) -> None: # Remove binary tool self.hw_tool_helper.remove( self.model.resources, - self.get_hw_tools_from_values(self.get_enabled_hw_tool_list_values()), + self.get_enable_hw_tools(), ) self._stored.resource_installed = False - success = self.exporter.uninstall() - if not success: - msg = "Failed to uninstall exporter, please refer to `juju debug-log`" - # we probably don't need to set any status here because the charm - # will go away soon, so only logging is enough - logger.warning(msg) - self._stored.exporter_installed = not success - logger.info("Remove complete") + + # Remove exporters + for exporter in self.exporters: + self.model.unit.status = MaintenanceStatus(f"Removing {exporter.exporter_name}...") + exporter.uninstall() def _on_update_status(self, _: EventBase) -> None: # noqa: C901 """Update the charm's status.""" @@ -178,187 +185,115 @@ def _on_update_status(self, _: EventBase) -> None: # noqa: C901 # The charm should be in BlockedStatus with install failed msg return # type: ignore[unreachable] - if not self.exporter_enabled: + if not self.cos_agent_related: self.model.unit.status = BlockedStatus("Missing relation: [cos-agent]") return - config_valid, config_valid_message = self.validate_exporter_configs() - if not config_valid: - self.model.unit.status = BlockedStatus(config_valid_message) - return + for exporter in self.exporters: + config_valid, config_valid_message = exporter.validate_exporter_configs() + if not config_valid: + self.model.unit.status = BlockedStatus(config_valid_message) + return - hw_tool_ok, error_msg = self.hw_tool_helper.check_installed( - self.get_hw_tools_from_values(self.get_enabled_hw_tool_list_values()) - ) + hw_tool_ok, error_msg = self.hw_tool_helper.check_installed(self.get_enable_hw_tools()) if not hw_tool_ok: self.model.unit.status = BlockedStatus(error_msg) return - if not self.exporter.check_health(): - logger.warning("Exporter health check - failed.") - # if restart isn't successful, an ExporterError exception will be raised here - self.restart_exporter() - - self.model.unit.status = ActiveStatus("Unit is ready") - - def restart_exporter(self) -> None: - """Restart exporter service with retry.""" - try: - for i in range(1, EXPORTER_HEALTH_RETRY_COUNT + 1): - logger.warning("Restarting exporter - %d retry", i) - self.exporter.restart() - sleep(EXPORTER_HEALTH_RETRY_TIMEOUT) - if self.exporter.check_active(): - logger.info("Exporter active after restart.") - break - if not self.exporter.check_active(): - logger.error("Failed to restart the exporter.") - raise ExporterError(EXPORTER_CRASH_MSG) - except Exception as err: # pylint: disable=W0718 - logger.error("Exporter crashed unexpectedly: %s", err) - raise ExporterError(EXPORTER_CRASH_MSG) from err + # Check health of all exporters + exporters_health = [self._check_exporter_health(exporter) for exporter in self.exporters] + + if all(exporters_health): + self.model.unit.status = ActiveStatus("Unit is ready") + + def _check_exporter_health(self, exporter: BaseExporter) -> bool: + """Check exporter health.""" + if not exporter.check_health(): + logger.warning("%s - Exporter health check failed.", exporter.exporter_name) + try: + exporter.restart() + except ExporterError as e: + msg = f"Exporter {exporter.exporter_name} crashed unexpectedly: {e}" + logger.error(msg) + # Setting the status as blocked instead of error + # since other exporters may still be healthy. + self.model.unit.status = BlockedStatus(msg) + return False + return True def _on_config_changed(self, event: EventBase) -> None: """Reconfigure charm.""" if not self._stored.resource_installed: # type: ignore[truthy-function] - logging.info( # type: ignore[unreachable] + logger.info( # type: ignore[unreachable] "Config changed called before install complete, deferring event: %s", event.handle, ) event.defer() - if self.exporter_enabled: - success, message = self.validate_exporter_configs() + if self.cos_agent_related: + success, message = self.validate_configs() if not success: self.model.unit.status = BlockedStatus(message) return - - success = self.exporter.template.render_config( - port=int(self.model.config["exporter-port"]), - level=str(self.model.config["exporter-log-level"]), - redfish_conn_params=self.get_redfish_conn_params( - self.get_hw_tools_from_values(self.get_enabled_hw_tool_list_values()) - ), - collect_timeout=int(self.model.config["collect-timeout"]), - hw_tools=self.get_hw_tools_from_values(self.get_enabled_hw_tool_list_values()), - ) - if not success: - message = "Failed to configure exporter, please check if the server is healthy." - self.model.unit.status = BlockedStatus(message) - return - self.exporter.restart() + for exporter in self.exporters: + success = exporter.render_config() + if success: + exporter.restart() + else: + message = ( + f"Failed to configure {exporter.exporter_name}, " + "please check if the server is healthy." + ) + self.model.unit.status = BlockedStatus(message) self._on_update_status(event) def _on_cos_agent_relation_joined(self, event: EventBase) -> None: - """Start the exporter when relation joined.""" - if ( - not self._stored.resource_installed # type: ignore[truthy-function] - or not self._stored.exporter_installed # type: ignore[truthy-function] - ): + """Enable and start the exporters when relation joined.""" + if not self._stored.resource_installed: # type: ignore[truthy-function] logger.info( # type: ignore[unreachable] - "Defer cos-agent relation join because exporter or resources is not ready yet." + "Defer cos-agent relation join because resources are not ready yet." ) event.defer() return - self.exporter.enable() - self.exporter.start() - logger.info("Start and enable exporter service") - self._on_update_status(event) - def _on_cos_agent_relation_departed(self, event: EventBase) -> None: - """Remove the exporter when relation departed.""" - if self._stored.exporter_installed: # type: ignore[truthy-function] - self.exporter.stop() - self.exporter.disable() - logger.info("Stop and disable exporter service") - self._on_update_status(event) - - def get_redfish_conn_params(self, enabled_hw_tool_list: List[HWTool]) -> Dict[str, Any]: - """Get redfish connection parameters if redfish is available.""" - if HWTool.REDFISH not in enabled_hw_tool_list: - logger.warning("Redfish unavailable, disregarding redfish config options...") - return {} - return { - "host": f"https://{get_bmc_address()}", - "username": self.model.config.get("redfish-username", ""), - "password": self.model.config.get("redfish-password", ""), - "timeout": self.model.config.get("collect-timeout"), - } + for exporter in self.exporters: + exporter.enable_and_start() + logger.info("Enabled and started %s service", exporter.exporter_name) - def validate_exporter_configs(self) -> Tuple[bool, str]: - """Validate the static and runtime config options for the exporter.""" - port = int(self.model.config["exporter-port"]) - if not 1 <= port <= 65535: - logger.error("Invalid exporter-port: port must be in [1, 65535].") - return False, "Invalid config: 'exporter-port'" - - level = str(self.model.config["exporter-log-level"]) - allowed_choices = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"} - if level.upper() not in allowed_choices: - logger.error( - "Invalid exporter-log-level: level must be in %s (case-insensitive).", - allowed_choices, - ) - return False, "Invalid config: 'exporter-log-level'" + self._on_update_status(event) - # Note we need to use `is False` because `None` means redfish is not - # available. - if self.redfish_conn_params_valid is False: - logger.error("Invalid redfish credentials.") - return False, "Invalid config: 'redfish-username' or 'redfish-password'" + def _on_cos_agent_relation_departed(self, event: EventBase) -> None: + """Remove the exporters when relation departed.""" + for exporter in self.exporters: + exporter.disable_and_stop() + logger.info("Disabled and stopped %s service", exporter.exporter_name) - return True, "Exporter config is valid." + self._on_update_status(event) def get_num_cos_agent_relations(self, relation_name: str) -> int: """Get the number of relation given a relation_name.""" relations = self.model.relations.get(relation_name, []) return len(relations) - @property - def exporter_enabled(self) -> bool: - """Return True if cos-agent relation is present.""" - return self.num_cos_agent_relations == 1 + def validate_configs(self) -> Tuple[bool, str]: + """Validate the static and runtime config options for the charm.""" + exporter_ports = [] + for exporter in self.exporters: + exporter_ports.append(exporter.port) + config_valid, config_valid_message = exporter.validate_exporter_configs() + if not config_valid: + return config_valid, config_valid_message + + if len(exporter_ports) > len(set(exporter_ports)): + return False, "Ports must be unique for each exporter." + + return True, "Charm config is valid." @property - def redfish_conn_params_valid(self) -> Optional[bool]: - """Check if redfish connections parameters is valid or not. - - If the redfish connection params is not available this property returns - None. Otherwise, it verifies the connection parameters. If the redfish - connection parameters are valid, it returns True; if not valid, it - returns False. - """ - redfish_conn_params = self.get_redfish_conn_params( - self.get_hw_tools_from_values(self.get_enabled_hw_tool_list_values()) - ) - if not redfish_conn_params: - return None - - redfish_obj = None - try: - redfish_obj = redfish_client( - base_url=redfish_conn_params.get("host", ""), - username=redfish_conn_params.get("username", ""), - password=redfish_conn_params.get("password", ""), - timeout=redfish_conn_params.get("timeout", REDFISH_TIMEOUT), - max_retry=REDFISH_MAX_RETRY, - ) - redfish_obj.login(auth="session") - except InvalidCredentialsError as e: - result = False - logger.error("invalid redfish credential: %s", str(e)) - except Exception as e: # pylint: disable=W0718 - result = False - logger.error("cannot connect to redfish: %s", str(e)) - else: - result = True - finally: - # Make sure to close connection at the end - if redfish_obj: - redfish_obj.logout() - - return result + def cos_agent_related(self) -> bool: + """Return True if cos-agent relation is present.""" + return self.num_cos_agent_relations != 0 if __name__ == "__main__": # pragma: nocover diff --git a/src/config.py b/src/config.py index fe7f6559..abb4c995 100644 --- a/src/config.py +++ b/src/config.py @@ -4,19 +4,50 @@ from enum import Enum from pathlib import Path -# Exporter -EXPORTER_NAME = "hardware-exporter" -EXPORTER_CONFIG_PATH = Path(f"/etc/{EXPORTER_NAME}-config.yaml") -EXPORTER_SERVICE_PATH = Path(f"/etc/systemd/system/{EXPORTER_NAME}.service") -EXPORTER_CONFIG_TEMPLATE = f"{EXPORTER_NAME}-config.yaml.j2" -EXPORTER_SERVICE_TEMPLATE = f"{EXPORTER_NAME}.service.j2" -EXPORTER_HEALTH_RETRY_COUNT = 3 -EXPORTER_HEALTH_RETRY_TIMEOUT = 3 -EXPORTER_CRASH_MSG = "Exporter crashed unexpectedly, please refer to systemd logs..." +from pydantic import BaseModel # pylint: disable = no-name-in-module -# Redfish -REDFISH_TIMEOUT = 10 -REDFISH_MAX_RETRY = 2 + +class ExporterSettings(BaseModel): # pylint: disable = too-few-public-methods + """Constant settings common across exporters.""" + + health_retry_count: int = 3 + health_retry_timeout: int = 3 + service_template: str + service_path: Path + name: str + config_template: str + config_path: Path + + +class HardwareExporterSettings(ExporterSettings): # pylint: disable = too-few-public-methods + """Constant settings for Hardware Exporter.""" + + name: str = "hardware-exporter" + config_path: Path = Path(f"/etc/{name}-config.yaml") + service_path: Path = Path(f"/etc/systemd/system/{name}.service") + config_template: str = f"{name}-config.yaml.j2" + service_template: str = f"{name}.service.j2" + crash_msg: str = "Hardware exporter crashed unexpectedly, please refer to systemd logs..." + + redfish_timeout: int = 10 + redfish_max_retry: int = 2 + + +HARDWARE_EXPORTER_SETTINGS = HardwareExporterSettings() + + +class SmartCtlExporterSettings(ExporterSettings): # pylint: disable = too-few-public-methods + """Constant settings for SmartCtl Exporter.""" + + name: str = "smartctl-exporter" + config_path: Path = Path(f"/etc/{name}-config.yaml") + service_path: Path = Path(f"/etc/systemd/system/{name}.service") + config_template: str = f"{name}-config.yaml.j2" + service_template: str = f"{name}.service.j2" + crash_msg: str = "SmartCtl exporter crashed unexpectedly, please refer to systemd logs..." + + +SMARTCTL_EXPORTER_SETTINGS = SmartCtlExporterSettings() class SystemVendor(str, Enum): @@ -46,6 +77,8 @@ class HWTool(str, Enum): IPMI_SEL = "ipmi_sel" IPMI_SENSOR = "ipmi_sensor" REDFISH = "redfish" + SMARTCTL = "smartctl" + SMARTCTL_EXPORTER = "smartctl_exporter" TPR_RESOURCES: t.Dict[HWTool, str] = { @@ -55,7 +88,7 @@ class HWTool(str, Enum): HWTool.SAS3IRCU: "sas3ircu-bin", } -EXPORTER_COLLECTOR_MAPPING = { +HARDWARE_EXPORTER_COLLECTOR_MAPPING = { HWTool.STORCLI: ["collector.mega_raid"], HWTool.PERCCLI: ["collector.poweredge_raid"], HWTool.SAS2IRCU: ["collector.lsi_sas_2"], @@ -70,4 +103,4 @@ class HWTool(str, Enum): TOOLS_DIR = Path("/usr/sbin") # SNAP environment -SNAP_COMMON = Path(f"/var/snap/{EXPORTER_NAME}/common") +SNAP_COMMON = Path(f"/var/snap/{HARDWARE_EXPORTER_SETTINGS.name}/common") diff --git a/src/grafana_dashboards/smart.json b/src/grafana_dashboards/smart.json new file mode 100644 index 00000000..8ea558ee --- /dev/null +++ b/src/grafana_dashboards/smart.json @@ -0,0 +1,3956 @@ +{ + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "type": "row", + "title": "Overview", + "panels": [], + "collapsed": false, + "gridPos": { + "x": 0, + "y": 0, + "w": 24, + "h": 1 + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_devices{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "Devices count", + "gridPos": { + "x": 0, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_devices{instance=~\"$instance\", job=\"$job\"}) - sum(smartctl_device{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "Missing devices", + "gridPos": { + "x": 3, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "max(smartctl_device_smartctl_exit_status{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "Exit status", + "gridPos": { + "x": 6, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_device_num_err_log_entries{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "Error log entries", + "gridPos": { + "x": 9, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_device_media_errors{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "Media errors", + "gridPos": { + "x": 12, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_device_error_log_count{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "SMART error log count", + "gridPos": { + "x": 15, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_device_critical_warning{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "Critical warnings", + "gridPos": { + "x": 18, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(1 - smartctl_device_smart_status{instance=~\"$instance\", job=\"$job\"})", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "stat", + "title": "SMART failed", + "gridPos": { + "x": 21, + "y": 1, + "w": 3, + "h": 3 + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "orientation": "auto", + "textMode": "auto", + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "showPercentChange": false, + "wideLayout": true, + "text": {} + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "smartctl_device{instance=~\"$instance\", job=\"$job\"}", + "range": false, + "instant": true, + "format": "table", + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "__name__": true, + "Time": true, + "Value": true, + "instance": true, + "job": true + }, + "indexByName": { + "device": 0, + "model_name": 1, + "form_factor": 2, + "serial_number": 3, + "protocol": 4, + "ata_version": 5, + "firmware_version": 6, + "ata_additional_product_id": 7, + "interface": 8, + "model_family": 9, + "sata_version": 10 + }, + "renameByName": { + "device": "Device", + "model_name": "Model name", + "form_factor": "Form factor", + "serial_number": "Serial number", + "protocol": "Protocol", + "ata_version": "ATA version", + "firmware_version": "Firmware version", + "ata_additional_product_id": "ATA additional product ID", + "interface": "Interface", + "model_family": "Model family", + "sata_version": "SATA version" + } + } + } + ], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "table", + "title": "Device Info", + "gridPos": { + "x": 0, + "y": 4, + "w": 24, + "h": 8 + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "options": { + "cellHeight": "md", + "frameIndex": -1, + "showHeader": true + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum(smartctl_device{instance=~\"$instance\", job=\"$job\"}) by (instance, device, model_name)", + "range": false, + "instant": true, + "format": "table", + "refId": "DEVICE" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_temperature{instance=~\"$instance\", job=\"$job\", temperature_type=\"current\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "TEMP" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_smartctl_exit_status{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "EXIT" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_smart_status{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "PASSED" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_power_on_seconds{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "POWON" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_power_cycle_count{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "PCC" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_interface_speed{instance=~\"$instance\", job=\"$job\", speed_type=\"current\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "INTSPEED" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_capacity_bytes{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "CAPBYTES" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_capacity_blocks{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "CAPBLOCKS" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_block_size{blocks_type=\"logical\", instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "BLKSIZELOG" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_block_size{blocks_type=\"physical\", instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "BLKSIZEPHY" + }, + { + "editorMode": "code", + "expr": "sum(smartctl_device_error_log_count{instance=~\"$instance\", job=\"$job\"}) by (instance, device)", + "range": false, + "instant": true, + "format": "table", + "refId": "ERRLOGCNT" + } + ], + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(device|model_name|instance|Value\\s.*)" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value #DEVICE": true + }, + "indexByName": {}, + "renameByName": { + "instance": "Instance", + "device": "Device", + "model_name": "Model name", + "Value #TEMP": "Temperature", + "Value #EXIT": "Exit status", + "Value #PASSED": "Passed", + "Value #POWON": "Power on seconds", + "Value #PCC": "Power Cycle Count", + "Value #INTSPEED": "Device interface speed", + "Value #CAPBYTES": "Capacity Bytes", + "Value #CAPBLOCKS": "Capacity Blocks", + "Value #BLKSIZELOG": "Block size (logical)", + "Value #BLKSIZEPHY": "Block size (physical)", + "Value #ERRLOGCNT": "Error log count" + } + } + } + ], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "table", + "title": "SMART overview", + "gridPos": { + "x": 0, + "y": 12, + "w": 24, + "h": 8 + }, + "fieldConfig": { + "defaults": {}, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Device" + }, + "properties": [ + { + "id": "custom.width", + "value": 70 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Instance" + }, + "properties": [ + { + "id": "custom.width", + "value": 320 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model name" + }, + "properties": [ + { + "id": "custom.width", + "value": 220 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Temperature" + }, + "properties": [ + { + "id": "unit", + "value": "celsius" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Power on seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Device interface speed" + }, + "properties": [ + { + "id": "unit", + "value": "bps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Capacity Bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Capacity Blocks" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Block size (logical)" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Block size (physical)" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "options": { + "cellHeight": "md", + "frameIndex": -1, + "showHeader": true + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "smartctl_device_attribute{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\", job=\"$job\"}", + "range": false, + "instant": true, + "format": "table", + "refId": "A" + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "__name__": true, + "Time": true, + "job": true + }, + "indexByName": { + "device": 0, + "model_name": 1, + "attribute_id": 2, + "attribute_name": 3, + "attribute_value_type": 4, + "attribute_flags_long": 5, + "attribute_flags_short": 6, + "Value": 7 + }, + "renameByName": { + "device": "Device", + "attribute_flags_long": "Attribute flags (long)", + "attribute_flags_short": "Attribute flags (short)", + "attribute_id": "Attribute ID", + "attribute_name": "Attribute name", + "attribute_value_type": "Attribute value type", + "model_name": "Model Name" + } + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Instance": { + "aggregations": [], + "operation": "groupby" + }, + "Device": { + "aggregations": [], + "operation": "groupby" + }, + "Model Name": { + "aggregations": [], + "operation": "groupby" + }, + "Attribute ID": { + "aggregations": [], + "operation": "groupby" + }, + "Attribute name": { + "aggregations": [], + "operation": "groupby" + }, + "Attribute flags (long)": { + "aggregations": [], + "operation": "groupby" + }, + "Attribute flags (short)": { + "aggregations": [], + "operation": "groupby" + }, + "Attribute value type": { + "aggregations": [ + "allValues" + ], + "operation": "aggregate" + }, + "Value": { + "aggregations": [ + "allValues" + ], + "operation": "aggregate" + } + } + } + } + ], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "table", + "title": "SMART attributes", + "gridPos": { + "x": 0, + "y": 20, + "w": 24, + "h": 8 + }, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "options": { + "cellHeight": "md", + "frameIndex": -1, + "showHeader": true + } + }, + { + "type": "row", + "title": "Metrics", + "panels": [], + "collapsed": false, + "gridPos": { + "x": 0, + "y": 28, + "w": 24, + "h": 1 + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_temperature{instance=~\"$instance\", job=\"$job\",temperature_type=\"current\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Temperature", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 29, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "celsius" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_smartctl_exit_status{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Smartctl exit status", + "maxDataPoints": 100, + "gridPos": { + "x": 6, + "y": 29, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_smart_status{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "SMART passed", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 29, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (increase(smartctl_device_power_on_seconds{instance=~\"$instance\", job=\"$job\"}[1h]) * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Power on 1 hour", + "interval": "1h", + "maxDataPoints": 100, + "gridPos": { + "x": 18, + "y": 29, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_power_cycle_count{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Power cycle count", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 37, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_percentage_used{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Write percentage used", + "maxDataPoints": 100, + "gridPos": { + "x": 6, + "y": 37, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_nvme_capacity_bytes{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "NVMe capacity bytes", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 37, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_num_err_log_entries{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Number of error log entries", + "description": "Contains the number of Error Information log entries over the life of the controller", + "maxDataPoints": 100, + "gridPos": { + "x": 18, + "y": 37, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_media_errors{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Number of media errors", + "description": "Contains the number of occurrences where the controller detected an unrecovered data integrity error. Errors such as uncorrectable ECC, CRC checksum failure, or LBA tag mismatch are included in this field", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 45, + "w": 8, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_error_log_count{instance=~\"$instance\", job=\"$job\",error_log_type=\"summary\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "SMART error log count", + "maxDataPoints": 100, + "gridPos": { + "x": 8, + "y": 45, + "w": 8, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_critical_warning{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Critical warnings for state of controller", + "maxDataPoints": 100, + "gridPos": { + "x": 16, + "y": 45, + "w": 8, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_interface_speed{instance=~\"$instance\", job=\"$job\",speed_type=\"current\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Interface speed (current)", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 53, + "w": 12, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bps" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_interface_speed{instance=~\"$instance\", job=\"$job\",speed_type=\"max\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Interface speed (max)", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 53, + "w": 12, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bps" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_capacity_bytes{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Capacity Bytes", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 61, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_capacity_blocks{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Capacity Blocks", + "maxDataPoints": 100, + "gridPos": { + "x": 6, + "y": 61, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_bytes_written{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Bytes written", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 61, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_bytes_read{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Bytes read", + "maxDataPoints": 100, + "gridPos": { + "x": 18, + "y": 61, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_block_size{instance=~\"$instance\", job=\"$job\",blocks_type=\"logical\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Block size (logical)", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 69, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_block_size{instance=~\"$instance\", job=\"$job\",blocks_type=\"physical\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Block size (physical)", + "maxDataPoints": 100, + "gridPos": { + "x": 6, + "y": 69, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "bytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_available_spare_threshold{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Available spare threshold", + "description": "When the Available Spare falls below the threshold indicated in this field, an asynchronous event completion may occur. The value is indicated as a normalized percentage (0 to 100%)", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 69, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (model_name) (smartctl_device_available_spare{instance=~\"$instance\", job=\"$job\"} * on(device, instance) group_left(model_name) smartctl_device{instance=~\"$instance\"})", + "legendFormat": "{{ model_name }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Available spare", + "description": "Normalized percentage (0 to 100%) of the remaining spare capacity available", + "maxDataPoints": 100, + "gridPos": { + "x": 18, + "y": 69, + "w": 6, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "percent" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "type": "row", + "title": "Go Runtime Metrics", + "panels": [], + "collapsed": false, + "gridPos": { + "x": 0, + "y": 77, + "w": 24, + "h": 1 + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (process_open_fds{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "File Descriptors", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 78, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (rate(process_cpu_seconds_total{instance=~\"$instance\", job=\"$job\"}[$__rate_interval]))", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "CPU Usage", + "maxDataPoints": 100, + "gridPos": { + "x": 4, + "y": 78, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (increase(process_resident_memory_bytes{instance=~\"$instance\", job=\"$job\"}[1h]))", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Memory Usage", + "maxDataPoints": 100, + "gridPos": { + "x": 8, + "y": 78, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "avg(go_memstats_stack_sys_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "sys", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "avg(go_memstats_stack_inuse_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "inuse", + "range": true, + "instant": false, + "format": "time_series", + "refId": "B" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Stack Memory Usage", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 78, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "avg(go_memstats_heap_sys_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "sys", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "avg(go_memstats_heap_idle_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "idle", + "range": true, + "instant": false, + "format": "time_series", + "refId": "B" + }, + { + "editorMode": "code", + "expr": "avg(go_memstats_heap_released_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "released", + "range": true, + "instant": false, + "format": "time_series", + "refId": "C" + }, + { + "editorMode": "code", + "expr": "avg(go_memstats_next_gc_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "next_gc", + "range": true, + "instant": false, + "format": "time_series", + "refId": "D" + }, + { + "editorMode": "code", + "expr": "avg(go_memstats_heap_inuse_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "inuse", + "range": true, + "instant": false, + "format": "time_series", + "refId": "E" + }, + { + "editorMode": "code", + "expr": "avg(go_memstats_heap_alloc_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "alloc", + "range": true, + "instant": false, + "format": "time_series", + "refId": "F" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Heap Memory Usage", + "maxDataPoints": 100, + "gridPos": { + "x": 16, + "y": 78, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "avg by (pod) (go_memstats_heap_objects{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Heap Objects", + "maxDataPoints": 100, + "gridPos": { + "x": 20, + "y": 78, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (go_threads{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Threads", + "maxDataPoints": 100, + "gridPos": { + "x": 0, + "y": 86, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (go_goroutines{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Goroutines", + "maxDataPoints": 100, + "gridPos": { + "x": 4, + "y": 86, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (rate(go_memstats_alloc_bytes_total{instance=~\"$instance\", job=\"$job\"}[$__rate_interval]))", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Go Alloc Rate", + "maxDataPoints": 100, + "gridPos": { + "x": 8, + "y": 86, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "Bps" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (go_memstats_alloc_bytes{instance=~\"$instance\", job=\"$job\"})", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Go Alloc Bytes", + "maxDataPoints": 100, + "gridPos": { + "x": 12, + "y": 86, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (rate(go_gc_duration_seconds_count{instance=~\"$instance\", job=\"$job\"}[$__rate_interval]))", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Go GC Per Second", + "maxDataPoints": 100, + "gridPos": { + "x": 16, + "y": 86, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "short" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + }, + { + "links": [], + "repeatDirection": "h", + "tags": [], + "targets": [ + { + "editorMode": "code", + "expr": "sum by (pod) (rate(go_gc_duration_seconds_sum{instance=~\"$instance\", job=\"$job\"}[$__rate_interval]))", + "legendFormat": "{{ pod }}", + "range": true, + "instant": false, + "format": "time_series", + "refId": "A" + } + ], + "transformations": [], + "transparent": false, + "datasource": { + "uid": "${prometheusds}" + }, + "type": "timeseries", + "title": "Go GC Duration Seconds", + "maxDataPoints": 100, + "gridPos": { + "x": 20, + "y": 86, + "w": 4, + "h": 8 + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "unit": "s" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + } + } + ], + "schemaVersion": 36, + "tags": [ + "smartctl" + ], + "timezone": "browser", + "description": "Dashboard for smartctl", + "time": { + "from": "now-6h", + "to": "now" + }, + "title": "Smartctl", + "uid": "smartctl", + "version": 1, + "templating": { + "list": [ + { + "multi": false, + "options": [], + "skipUrlSync": false, + "datasource": null, + "hide": 0, + "type": "datasource", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "query": "prometheus" + }, + { + "options": [], + "skipUrlSync": false, + "datasource": { + "uid": "${prometheusds}" + }, + "hide": 0, + "type": "query", + "label": "Job", + "name": "job", + "query": "label_values(smartctl_version, job)", + "sort": 1, + "refresh": 2 + }, + { + "multi": true, + "options": [], + "skipUrlSync": false, + "datasource": { + "uid": "${prometheusds}" + }, + "hide": 0, + "type": "query", + "label": "Instance", + "name": "instance", + "query": "label_values(smartctl_version{job=\"$job\"}, instance)", + "includeAll": true, + "sort": 1, + "refresh": 2 + } + ] + } +} \ No newline at end of file diff --git a/src/hw_tools.py b/src/hw_tools.py index 30473717..5fde688d 100644 --- a/src/hw_tools.py +++ b/src/hw_tools.py @@ -3,13 +3,16 @@ Define strategy for install, remove and verifier for different hardware. """ +import io import logging import os import shutil import stat import subprocess +import tarfile from abc import ABCMeta, abstractmethod from functools import lru_cache +from http import HTTPStatus from pathlib import Path from typing import Dict, List, Set, Tuple @@ -28,7 +31,7 @@ validate_checksum, ) from config import ( - REDFISH_TIMEOUT, + HARDWARE_EXPORTER_SETTINGS, SNAP_COMMON, TOOLS_DIR, TPR_RESOURCES, @@ -60,6 +63,14 @@ def __init__(self, tool: HWTool, path: Path): self.message = f"Tool: {tool} path: {path} size is zero" +class ResourceInstallationError(Exception): + """Exception raised when a hardware tool installation fails.""" + + def __init__(self, tool: HWTool): + """Init.""" + super().__init__(f"Installation failed for tool: {tool}") + + def copy_to_snap_common_bin(source: Path, filename: str) -> None: """Copy file to $SNAP_COMMON/bin folder.""" Path(f"{SNAP_COMMON}/bin").mkdir(parents=False, exist_ok=True) @@ -300,7 +311,7 @@ def install(self) -> None: apt.add_package(self.pkg, update_cache=True) def remove(self) -> None: - # Skip removing because we afriad this cause dependency error + # Skip removing because this may cause dependency error # for other services on the same machine. logger.info("SSACLIStrategy skip removing %s", self.pkg) @@ -318,7 +329,7 @@ def install(self) -> None: apt_helpers.add_pkg_with_candidate_version(self.pkg) def remove(self) -> None: - # Skip removing because we afriad this cause dependency error + # Skip removing because this may cause dependency error # for other services on the same machine. logger.info("%s skip removing %s", self._name, self.pkg) @@ -358,6 +369,75 @@ def check(self) -> bool: return True +class SmartCtlStrategy(APTStrategyABC): + """Strategy for installing ipmi.""" + + pkg = "smartmontools" + _name = HWTool.SMARTCTL + + def install(self) -> None: + apt_helpers.add_pkg_with_candidate_version(self.pkg) + + def remove(self) -> None: + # Skip removing because this may cause dependency error + # for other services on the same machine. + logger.info("%s skip removing %s", self._name, self.pkg) + + def check(self) -> bool: + """Check package status.""" + return check_deb_pkg_installed(self.pkg) + + +class SmartCtlExporterStrategy(StrategyABC): # pylint: disable=R0903 + """Install smartctl exporter binary.""" + + _name = HWTool.SMARTCTL_EXPORTER + + _resource_dir = Path("/opt/SmartCtlExporter/") + _release = ( + "https://github.com/prometheus-community/" + "smartctl_exporter/releases/download/v0.12.0/smartctl_exporter-0.12.0.linux-amd64.tar.gz" + ) + _exporter_name = "smartctl_exporter" + _exporter_path = Path(_resource_dir / "smartctl_exporter") + + def install(self) -> None: + """Install exporter binary from internet.""" + logger.debug("Installing SmartCtlExporter") + self._resource_dir.mkdir(parents=True, exist_ok=True) + + resp = requests.get(self._release, timeout=60) + if resp.status_code != HTTPStatus.OK: + logger.error("Failed to download smartctl exporter binary.") + raise ResourceInstallationError(self._name) + + success = False + fileobj = io.BytesIO(resp.content) + with tarfile.open(fileobj=fileobj, mode="r:gz") as tar: + for member in tar.getmembers(): + if member.name.endswith(self._exporter_name): + with open(self._exporter_path, "wb") as outfile: + member_file = tar.extractfile(member) + if member_file: + outfile.write(member_file.read()) + success = True + if success: + make_executable(self._exporter_path) + if not success: + logger.error("Failed to install SmartCtlExporter binary.") + raise ResourceInstallationError(self._name) + + def remove(self) -> None: + """Remove downloaded exporter binary.""" + logger.debug("Remove SmartCtlExporter") + shutil.rmtree(self._resource_dir) + + def check(self) -> bool: + """Check package status.""" + logger.debug("Check SmartCtlExporter resources") + return self._exporter_path.is_file() + + def _raid_hw_verifier_hwinfo() -> Set[HWTool]: """Verify if a supported RAID card exists on the machine using the hwinfo command.""" hwinfo_output = hwinfo("storage") @@ -441,7 +521,9 @@ def redfish_available() -> bool: bmc_address = get_bmc_address() health_check_endpoint = f"https://{bmc_address}:443/redfish/v1/" try: - response = requests.get(health_check_endpoint, verify=False, timeout=REDFISH_TIMEOUT) + response = requests.get( + health_check_endpoint, verify=False, timeout=HARDWARE_EXPORTER_SETTINGS.redfish_timeout + ) response.raise_for_status() data = response.json() # only check if the data is empty dict or not @@ -500,6 +582,14 @@ def bmc_hw_verifier() -> List[HWTool]: return tools +def disk_hw_verifier() -> List[HWTool]: + """Verify if the disk exists on the machine.""" + lshw_storage = lshw(class_filter="disk") + if lshw_storage: + return [HWTool.SMARTCTL] + return [] + + # Using cache here to avoid repeat call. # The lru_cache should be cleaned every time the hook been triggered. @lru_cache @@ -507,7 +597,8 @@ def get_hw_tool_enable_list() -> List[HWTool]: """Return HWTool enable list.""" raid_enable_list = raid_hw_verifier() bmc_enable_list = bmc_hw_verifier() - return raid_enable_list + bmc_enable_list + disk_enable_list = disk_hw_verifier() + return raid_enable_list + bmc_enable_list + disk_enable_list class HWToolHelper: @@ -526,6 +617,7 @@ def strategies(self) -> List[StrategyABC]: IPMIDCMIStrategy(), IPMISENSORStrategy(), RedFishStrategy(), + SmartCtlStrategy(), ] def fetch_tools( # pylint: disable=W0102 @@ -585,8 +677,8 @@ def install(self, resources: Resources, hw_enable_list: List[HWTool]) -> Tuple[b for strategy in self.strategies: if strategy.name not in hw_enable_list: continue - # TPRStrategy try: + # TPRStrategy if isinstance(strategy, TPRStrategyABC): path = fetch_tools.get(strategy.name) # pylint: disable=W0212 if path: diff --git a/src/service.py b/src/service.py index 17e4170f..7c721f6b 100644 --- a/src/service.py +++ b/src/service.py @@ -1,45 +1,31 @@ """Exporter service helper.""" import os -from functools import wraps +from abc import ABC, abstractmethod from logging import getLogger from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple +from time import sleep +from typing import Any, Dict, List, Optional, Tuple from charms.operator_libs_linux.v1 import systemd from jinja2 import Environment, FileSystemLoader +from ops.model import ConfigData +from redfish import redfish_client +from redfish.rest.v1 import InvalidCredentialsError from config import ( - EXPORTER_COLLECTOR_MAPPING, - EXPORTER_CONFIG_PATH, - EXPORTER_CONFIG_TEMPLATE, - EXPORTER_NAME, - EXPORTER_SERVICE_PATH, - EXPORTER_SERVICE_TEMPLATE, + HARDWARE_EXPORTER_COLLECTOR_MAPPING, + HARDWARE_EXPORTER_SETTINGS, + SMARTCTL_EXPORTER_SETTINGS, + ExporterSettings, HWTool, ) +from hardware import get_bmc_address +from hw_tools import SmartCtlExporterStrategy logger = getLogger(__name__) -def check_installed(func: Callable) -> Callable: - """Ensure exporter service and exporter config is installed before running operations.""" - - @wraps(func) - def wrapper(self: Any, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any: - """Wrap func.""" - config_path = Path(EXPORTER_CONFIG_PATH) - service_path = Path(EXPORTER_SERVICE_PATH) - if not config_path.exists() or not service_path.exists(): - logger.error("Exporter is not installed properly.") - logger.error("Failed to run '%s'", func.__name__) - return False - return_value = func(self, *args, **kwargs) - return return_value - - return wrapper - - class ExporterError(Exception): """Custom exception for exporter errors. @@ -47,171 +33,407 @@ class ExporterError(Exception): """ -class ExporterTemplate: - """Jinja template helper class for exporter.""" +class BaseExporter(ABC): + """A class representing the exporter and the metric endpoints.""" - def __init__(self, search_path: Path): - """Initialize template class.""" - self.environment = Environment(loader=FileSystemLoader(search_path / "templates")) - self.config_template = self.environment.get_template(EXPORTER_CONFIG_TEMPLATE) - self.service_template = self.environment.get_template(EXPORTER_SERVICE_TEMPLATE) + # pylint: disable=too-many-instance-attributes - def _install(self, path: Path, content: str, mode: Optional[int] = None) -> bool: - """Install file.""" - success = True - try: - logger.info("Writing file to %s.", path) - fileobj = ( - os.fdopen(os.open(path, os.O_CREAT | os.O_WRONLY, mode), "w", encoding="utf-8") - if mode - # create file with default permissions based on default OS umask - else open(path, "w", encoding="utf-8") # pylint: disable=consider-using-with + exporter_config_path: Optional[Path] = None + + def __init__(self, charm_dir: Path, config: ConfigData, settings: ExporterSettings) -> None: + """Initialize the Exporter class.""" + self.charm_dir = charm_dir + + self.port: int + + self.settings = settings + self.environment = Environment(loader=FileSystemLoader(charm_dir / "templates")) + self.service_template = self.environment.get_template(self.settings.service_template) + self.exporter_service_path = self.settings.service_path + self.exporter_name = self.settings.name + + self.log_level = str(config["exporter-log-level"]) + + @staticmethod + @abstractmethod + def hw_tools() -> List[HWTool]: + """Return list hardware tools to watch.""" + + def validate_exporter_configs(self) -> Tuple[bool, str]: + """Validate the static and runtime config options for the exporter.""" + if not 1 <= self.port <= 65535: + logger.error("Invalid exporter port: port must be in [1, 65535].") + return False, "Invalid config: exporter's port" + + allowed_log_level_choices = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"} + if self.log_level.upper() not in allowed_log_level_choices: + logger.error( + "Invalid exporter-log-level: log-level must be in %s (case-insensitive).", + allowed_log_level_choices, ) - with fileobj as file: - file.write(content) + return False, "Invalid config: 'exporter-log-level'" + return True, "Exporter config is valid." - except (NotADirectoryError, PermissionError) as err: - logger.error(err) - logger.info("Writing file to %s - Failed.", path) - success = False - else: - logger.info("Writing file to %s - Done.", path) - return success + def resources_exist(self) -> bool: + """Return true if required resources exist. - def _uninstall(self, path: Path) -> bool: - """Uninstall file.""" - success = True - try: - logger.info("Removing file '%s'.", path) - if path.exists(): - path.unlink() - except PermissionError as err: - logger.error(err) - logger.info("Removing file '%s' - Failed.", path) - success = False - else: - logger.info("Removing file '%s' - Done.", path) - return success - - # pylint: disable=too-many-arguments - def render_config( - self, - port: int, - level: str, - collect_timeout: int, - redfish_conn_params: dict, - hw_tools: List[HWTool], - ) -> bool: - """Render and install exporter config file.""" - collectors = [] - for tool in hw_tools: - collector = EXPORTER_COLLECTOR_MAPPING.get(tool) - if collector is not None: - collectors += collector - content = self.config_template.render( - PORT=port, - LEVEL=level, - COLLECT_TIMEOUT=collect_timeout, - COLLECTORS=collectors, - REDFISH_ENABLE=redfish_conn_params != {}, - REDFISH_HOST=redfish_conn_params.get("host", ""), - REDFISH_USERNAME=redfish_conn_params.get("username", ""), - REDFISH_PASSWORD=redfish_conn_params.get("password", ""), - REDFISH_CLIENT_TIMEOUT=redfish_conn_params.get("timeout", ""), - ) - return self._install(EXPORTER_CONFIG_PATH, content, mode=0o600) + Overwrite this method if there are resources need to be installed. + """ + return True - def render_service(self, charm_dir: str, config_file: str) -> bool: - """Render and install exporter service file.""" - content = self.service_template.render(CHARMDIR=charm_dir, CONFIG_FILE=config_file) - return self._install(EXPORTER_SERVICE_PATH, content) + def install_resources(self) -> bool: + """Install the necessary resources for the exporter service. + + Overwrite this method if there are resources need to be installed. + """ + logger.debug("No required resources for %s", self.__class__.__name__) + return True + + def remove_resources(self) -> bool: + """Remove exporter resources. + + Overwrite this method if there are resources need to be removed. + """ + return True def remove_config(self) -> bool: - """Remove exporter config file.""" - return self._uninstall(EXPORTER_CONFIG_PATH) + """Remove exporter configuration file.""" + if self.exporter_config_path is not None and self.exporter_config_path.exists(): + return remove_file(self.exporter_config_path) + return True def remove_service(self) -> bool: """Remove exporter service file.""" - return self._uninstall(EXPORTER_SERVICE_PATH) + if self.exporter_service_path.exists(): + return remove_file(self.exporter_service_path) + return True + + def _restart(self) -> None: + """Restart the exporter daemon.""" + systemd.service_restart(self.exporter_name) + def enable_and_start(self) -> None: + """Enable and start the exporter service.""" + systemd.service_enable(self.exporter_name) + systemd.service_start(self.exporter_name) -class Exporter: - """A class representing the exporter and the metric endpoints.""" + def disable_and_stop(self) -> None: + """Disable and stop the exporter service.""" + systemd.service_disable(self.exporter_name) + systemd.service_stop(self.exporter_name) - def __init__(self, charm_dir: Path) -> None: - """Initialize the class.""" - self.charm_dir = charm_dir - self.template = ExporterTemplate(charm_dir) - - # pylint: disable=too-many-arguments - def install( - self, - port: int, - level: str, - redfish_conn_params: dict, - collect_timeout: int, - hw_tool_enable_list: List, - ) -> bool: + def check_active(self) -> bool: + """Check if the exporter is active or not.""" + return systemd.service_running(self.exporter_name) + + def check_health(self) -> bool: + """Check if the exporter daemon is healthy or not.""" + return not systemd.service_failed(self.exporter_name) + + def _render_service(self, params: Dict[str, str]) -> bool: + """Render and install exporter service file.""" + content = self.service_template.render(**params) + return write_to_file(self.exporter_service_path, content) + + def render_config(self) -> bool: + """Render exporter config file..""" + if self.exporter_config_path is not None: + content = self._render_config_content() + return write_to_file(self.exporter_config_path, content, mode=0o600) + return True + + def _render_config_content(self) -> str: + """Overwrite this method to render config content.""" + return "" + + def render_service(self) -> bool: + """Render required files for service.""" + return self._render_service({}) + + def verify_render_files_exist(self) -> bool: + """Verify if service installation is done.""" + config_file_exists = True + if self.exporter_config_path is not None: + config_file_exists = self.exporter_config_path.exists() + service_file_exists = self.exporter_service_path.exists() + return service_file_exists and config_file_exists + + def install(self) -> bool: """Install the exporter.""" - logger.info("Installing %s.", EXPORTER_NAME) - success = self.template.render_config( - port=port, - level=level, - redfish_conn_params=redfish_conn_params, - collect_timeout=collect_timeout, - hw_tools=hw_tool_enable_list, - ) - success = self.template.render_service(str(self.charm_dir), str(EXPORTER_CONFIG_PATH)) - if not success: - logger.error("Failed to install %s.", EXPORTER_NAME) - return success + logger.info("Installing %s.", self.exporter_name) + + # Install resources + install_resource_success = self.install_resources() + if not install_resource_success: + logger.error("Failed to install %s resources.", self.exporter_name) + return False + if not self.resources_exist(): + logger.error("%s resources are not installed properly.", self.exporter_name) + # pylint: disable=too-many-instance-attributes + return False + + # Render config + render_config_success = self.render_config() + if not render_config_success: + logger.error("Failed to render config files for %s.", self.exporter_name) + return False + + # Install service + render_service_success = self.render_service() + if not render_service_success: + logger.error("Failed to install %s.", self.exporter_name) + return False + + if not self.verify_render_files_exist(): + logger.error("%s is not installed properly.", self.exporter_name) + return False + systemd.daemon_reload() - logger.info("%s installed.", EXPORTER_NAME) - return success + + logger.info("%s installed.", self.exporter_name) + return True def uninstall(self) -> bool: """Uninstall the exporter.""" - logger.info("Uninstalling %s.", EXPORTER_NAME) - success = self.template.remove_config() - success = self.template.remove_service() - if not success: - logger.error("Failed to uninstall %s.", EXPORTER_NAME) - return success + logger.info("Uninstalling %s.", self.exporter_name) + service_removed = self.remove_service() + config_removed = self.remove_config() + resources_removed = self.remove_resources() + if not (service_removed and config_removed and resources_removed): + logger.error("Failed to uninstall %s.", self.exporter_name) + return False systemd.daemon_reload() - logger.info("%s uninstalled.", EXPORTER_NAME) - return success + logger.info("%s uninstalled.", self.exporter_name) + return True - @check_installed - def stop(self) -> None: - """Stop the exporter daemon.""" - systemd.service_stop(EXPORTER_NAME) + def restart(self) -> None: + """Restart exporter service with retry.""" + logger.info("Restarting exporter - %s", self.exporter_name) + try: + for i in range(1, self.settings.health_retry_count + 1): + logger.warning("Restarting exporter - %d retry", i) + self._restart() + sleep(self.settings.health_retry_timeout) + if self.check_active(): + logger.info("Exporter - %s active after restart.", self.exporter_name) + break + if not self.check_active(): + logger.error("Failed to restart exporter - %s.", self.exporter_name) + raise ExporterError() + except Exception as err: # pylint: disable=W0718 + logger.error("Exporter %s crashed unexpectedly: %s", self.exporter_name, err) + raise ExporterError() from err + + +def write_to_file(path: Path, content: str, mode: Optional[int] = None) -> bool: + """Write to file with provided content.""" + success = True + try: + logger.info("Writing file to %s.", path) + fileobj = ( + os.fdopen(os.open(path, os.O_CREAT | os.O_WRONLY, mode), "w", encoding="utf-8") + if mode + # create file with default permissions based on default OS umask + else open(path, "w", encoding="utf-8") # pylint: disable=consider-using-with + ) + with fileobj as file: + file.write(content) + except (NotADirectoryError, PermissionError) as err: + logger.error(err) + logger.info("Writing file to %s - Failed.", path) + success = False + else: + logger.info("Writing file to %s - Done.", path) + return success + + +def remove_file(path: Path) -> bool: + """Remove file.""" + success = True + try: + logger.info("Removing file '%s'.", path) + if path.exists(): + path.unlink() + except PermissionError as err: + logger.error(err) + logger.info("Removing file '%s' - Failed.", path) + success = False + else: + logger.info("Removing file '%s' - Done.", path) + return success + + +class SmartCtlExporter(BaseExporter): + """A class representing the smartctl exporter and the metric endpoints.""" + + required_config: bool = False + + def __init__(self, charm_dir: Path, config: ConfigData) -> None: + """Initialize the Hardware Exporter class.""" + super().__init__(charm_dir, config, SMARTCTL_EXPORTER_SETTINGS) + + self.port = int(config["smartctl-exporter-port"]) + self.collect_timeout = int(config["collect-timeout"]) + self.log_level = str(config["exporter-log-level"]) + self.strategy = SmartCtlExporterStrategy() + + def render_service(self) -> bool: + """Render required files for service.""" + service_rendered = self._render_service( + { + "PORT": str(self.port), + "LEVEL": self.log_level, + } + ) + return service_rendered - @check_installed - def start(self) -> None: - """Start the exporter daemon.""" - systemd.service_start(EXPORTER_NAME) + @staticmethod + def hw_tools() -> List[HWTool]: + """Return list hardware tools to watch.""" + return [HWTool.SMARTCTL] - @check_installed - def restart(self) -> None: - """Restart the exporter daemon.""" - systemd.service_restart(EXPORTER_NAME) + def install_resources(self) -> bool: + restart = False + if self.check_active(): + systemd.service_stop(self.exporter_name) + restart = True + self.strategy.install() + if restart: + systemd.service_restart(self.exporter_name) + logger.debug("Finish install resources for %s", self.exporter_name) + return True - @check_installed - def enable(self) -> None: - """Enable the exporter service.""" - systemd.service_enable(EXPORTER_NAME) + def resources_exist(self) -> bool: + return self.strategy.check() - @check_installed - def disable(self) -> None: - """Restart the exporter service.""" - systemd.service_disable(EXPORTER_NAME) + def remove_resources(self) -> bool: + self.strategy.remove() + return True - @check_installed - def check_active(self) -> bool: - """Check if the exporter is active or not.""" - return systemd.service_running(EXPORTER_NAME) - @check_installed - def check_health(self) -> bool: - """Check if the exporter daemon is healthy or not.""" - return not systemd.service_failed(EXPORTER_NAME) +class HardwareExporter(BaseExporter): + """A class representing the hardware exporter and the metric endpoints.""" + + required_config: bool = True + + def __init__(self, charm_dir: Path, config: ConfigData, enable_hw_tools: List[HWTool]) -> None: + """Initialize the Hardware Exporter class.""" + super().__init__(charm_dir, config, HARDWARE_EXPORTER_SETTINGS) + + self.config_template = self.environment.get_template(self.settings.config_template) + self.exporter_config_path = self.settings.config_path + self.port = int(config["hardware-exporter-port"]) + + self.enabled_hw_tool_list = enable_hw_tools + + self.redfish_conn_params = self.get_redfish_conn_params(config) + self.collect_timeout = int(config["collect-timeout"]) + + def _render_config_content(self) -> str: + """Render and install exporter config file.""" + collectors = [] + for tool in self.enabled_hw_tool_list: + collector = HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(tool) + if collector is not None: + collectors += collector + content = self.config_template.render( + PORT=self.port, + LEVEL=self.log_level, + COLLECT_TIMEOUT=self.collect_timeout, + COLLECTORS=collectors, + REDFISH_ENABLE=self.redfish_conn_params, + REDFISH_HOST=self.redfish_conn_params.get("host", ""), + REDFISH_USERNAME=self.redfish_conn_params.get("username", ""), + REDFISH_PASSWORD=self.redfish_conn_params.get("password", ""), + REDFISH_CLIENT_TIMEOUT=self.redfish_conn_params.get("timeout", ""), + ) + return content + + def render_service(self) -> bool: + """Render required files for service.""" + service_rendered = self._render_service( + { + "CHARMDIR": str(self.charm_dir), + "CONFIG_FILE": str(self.exporter_config_path), + } + ) + return service_rendered + + def validate_exporter_configs(self) -> Tuple[bool, str]: + """Validate the static and runtime config options for the exporter.""" + valid, msg = super().validate_exporter_configs() + if not valid: + return valid, msg + + # Note we need to use `is False` because `None` means redfish is not + # available. + if self.redfish_conn_params_valid(self.redfish_conn_params) is False: + logger.error("Invalid redfish credentials.") + return False, "Invalid config: 'redfish-username' or 'redfish-password'" + + return True, "Exporter config is valid." + + def redfish_conn_params_valid(self, redfish_conn_params: Dict[str, str]) -> Optional[bool]: + """Check if redfish connections parameters is valid or not. + + If the redfish connection params is not available this property returns + None. Otherwise, it verifies the connection parameters. If the redfish + connection parameters are valid, it returns True; if not valid, it + returns False. + """ + if not redfish_conn_params: + return None + + redfish_obj = None + try: + redfish_obj = redfish_client( + base_url=redfish_conn_params.get("host", ""), + username=redfish_conn_params.get("username", ""), + password=redfish_conn_params.get("password", ""), + timeout=redfish_conn_params.get( + "timeout", self.settings.redfish_timeout # type: ignore + ), + max_retry=self.settings.redfish_max_retry, # type: ignore + ) + redfish_obj.login(auth="session") + except InvalidCredentialsError as e: + result = False + logger.error("invalid redfish credential: %s", str(e)) + except Exception as e: # pylint: disable=W0718 + result = False + logger.error("cannot connect to redfish: %s", str(e)) + else: + result = True + finally: + # Make sure to close connection at the end + if redfish_obj: + redfish_obj.logout() + + return result + + def get_redfish_conn_params(self, config: ConfigData) -> Dict[str, Any]: + """Get redfish connection parameters if redfish is available.""" + if HWTool.REDFISH not in self.enabled_hw_tool_list: + logger.warning("Redfish unavailable, disregarding redfish config options...") + return {} + return { + "host": f"https://{get_bmc_address()}", + "username": config["redfish-username"], + "password": config["redfish-password"], + "timeout": config["collect-timeout"], + } + + @staticmethod + def hw_tools() -> List[HWTool]: + """Return list hardware tools to watch.""" + return [ + HWTool.STORCLI, + HWTool.SSACLI, + HWTool.SAS2IRCU, + HWTool.SAS3IRCU, + HWTool.PERCCLI, + HWTool.IPMI_DCMI, + HWTool.IPMI_SEL, + HWTool.IPMI_SENSOR, + HWTool.REDFISH, + ] diff --git a/templates/smartctl-exporter.service.j2 b/templates/smartctl-exporter.service.j2 new file mode 100644 index 00000000..b273cafe --- /dev/null +++ b/templates/smartctl-exporter.service.j2 @@ -0,0 +1,20 @@ +[Unit] +Description=smartctl exporter service +After=network-online.target + +[Service] +Type=simple +PIDFile=/run/smartctl_exporter.pid +ExecStart=/opt/SmartCtlExporter/smartctl_exporter -port {{ PORT }} +User=root +Group=root +SyslogIdentifier=smartctl_exporter +SyslogLevel={{ LEVEL }} +Restart=on-failure +RemainAfterExit=no +RestartSec=100ms +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py index 5773004d..bbbe3e62 100644 --- a/tests/functional/conftest.py +++ b/tests/functional/conftest.py @@ -3,7 +3,7 @@ import pytest from utils import RESOURCES_DIR, Resource -from config import EXPORTER_COLLECTOR_MAPPING, TPR_RESOURCES, HWTool +from config import HARDWARE_EXPORTER_COLLECTOR_MAPPING, TPR_RESOURCES, HWTool log = logging.getLogger(__name__) @@ -91,7 +91,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.STORCLI), file_name="storcli.deb", - collector_name=EXPORTER_COLLECTOR_MAPPING.get(HWTool.STORCLI)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.STORCLI)[0].replace( "collector.", "" ), bin_name=HWTool.STORCLI.value, @@ -99,7 +99,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.PERCCLI), file_name="perccli.deb", - collector_name=EXPORTER_COLLECTOR_MAPPING.get(HWTool.PERCCLI)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.PERCCLI)[0].replace( "collector.", "" ), bin_name=HWTool.PERCCLI.value, @@ -107,7 +107,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.SAS2IRCU), file_name="sas2ircu", - collector_name=EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS2IRCU)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS2IRCU)[0].replace( "collector.", "" ), bin_name=HWTool.SAS2IRCU.value, @@ -115,7 +115,7 @@ def resources() -> list[Resource]: Resource( resource_name=TPR_RESOURCES.get(HWTool.SAS3IRCU), file_name="sas3ircu", - collector_name=EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS3IRCU)[0].replace( + collector_name=HARDWARE_EXPORTER_COLLECTOR_MAPPING.get(HWTool.SAS3IRCU)[0].replace( "collector.", "" ), bin_name=HWTool.SAS3IRCU.value, diff --git a/tests/functional/requirements.txt b/tests/functional/requirements.txt index c993e030..b8eed527 100644 --- a/tests/functional/requirements.txt +++ b/tests/functional/requirements.txt @@ -5,3 +5,4 @@ pytest-operator # https://github.com/go-macaroon-bakery/py-macaroon-bakery/issues/94 protobuf<4.0 tenacity +pydantic < 2 diff --git a/tests/functional/test_charm.py b/tests/functional/test_charm.py index 3e330352..a66c1c46 100644 --- a/tests/functional/test_charm.py +++ b/tests/functional/test_charm.py @@ -157,6 +157,7 @@ async def test_required_resources(ops_test: OpsTest, provided_collectors, requir assert unit.workload_status_message == AppStatus.MISSING_RELATION +@pytest.mark.realhw @pytest.mark.abort_on_fail async def test_cos_agent_relation(ops_test: OpsTest, provided_collectors): """Test adding relation with grafana-agent.""" @@ -530,8 +531,9 @@ async def test_resource_clean_up(self, ops_test, app, unit, required_resources): ) +@pytest.mark.realhw class TestCharm: - """Perform basic functional testing of the charm without having the actual hardware.""" + """Perform tests that require one or more exporters to be present.""" async def test_config_file_permissions(self, unit, ops_test): """Check config file permissions are set correctly.""" @@ -542,10 +544,10 @@ async def test_config_file_permissions(self, unit, ops_test): assert results.get("stdout").rstrip("\n") == expected_file_mode async def test_config_changed_port(self, app, unit, ops_test): - """Test changing the config option: exporter-port.""" + """Test changing the config option: hardware-exporter-port.""" new_port = "10001" await asyncio.gather( - app.set_config({"exporter-port": new_port}), + app.set_config({"hardware-exporter-port": new_port}), ops_test.model.wait_for_idle(apps=[APP_NAME]), ) @@ -555,7 +557,7 @@ async def test_config_changed_port(self, app, unit, ops_test): config = yaml.safe_load(results.get("stdout").strip()) assert config["port"] == int(new_port) - await app.reset_config(["exporter-port"]) + await app.reset_config(["hardware-exporter-port"]) async def test_config_changed_log_level(self, app, unit, ops_test): """Test changing the config option: exporter-log-level.""" diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 12958f1e..85168ecf 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -11,11 +11,11 @@ import ops.testing from ops.model import ActiveStatus, BlockedStatus from parameterized import parameterized -from redfish.rest.v1 import InvalidCredentialsError import charm from charm import ExporterError, HardwareObserverCharm from config import HWTool +from service import HardwareExporter class TestCharm(unittest.TestCase): @@ -23,11 +23,6 @@ def setUp(self): self.harness = ops.testing.Harness(HardwareObserverCharm) self.addCleanup(self.harness.cleanup) - get_bmc_address_patcher = mock.patch.object(charm, "get_bmc_address") - self.mock_get_bmc_address = get_bmc_address_patcher.start() - self.mock_get_bmc_address.return_value = "127.0.0.1" - self.addCleanup(get_bmc_address_patcher.stop) - get_hw_tool_enable_list_patcher = mock.patch.object(charm, "get_hw_tool_enable_list") self.mock_get_hw_tool_enable_list = get_hw_tool_enable_list_patcher.start() self.mock_get_hw_tool_enable_list.return_value = [ @@ -38,18 +33,10 @@ def setUp(self): ] self.addCleanup(get_hw_tool_enable_list_patcher.stop) - redfish_client_patcher = mock.patch("charm.redfish_client") - redfish_client_patcher.start() - self.addCleanup(redfish_client_patcher.stop) - requests_patcher = mock.patch("hw_tools.requests") requests_patcher.start() self.addCleanup(requests_patcher.stop) - @classmethod - def setUpClass(cls): - pass - def _get_notice_count(self, hook): """Return the notice count for a given charm hook.""" notice_count = 0 @@ -64,417 +51,326 @@ def test_harness(self) -> None: self.harness.begin() self.assertFalse(self.harness.charm._stored.resource_installed) - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_install(self, mock_hw_tool_helper, mock_exporter) -> None: - """Test install event handler.""" - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - self.harness.begin() - self.harness.charm._stored.enabled_hw_tool_list_values = [] - self.harness.charm.on.install.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) - - self.harness.charm.exporter.install.assert_called_once() - self.harness.charm.hw_tool_helper.install.assert_called_with( - self.harness.charm.model.resources, - self.harness.charm._stored.enabled_hw_tool_list_values, - ) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_upgrade_charm(self, mock_hw_tool_helper, mock_exporter) -> None: - """Test upgrade_charm event handler.""" - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - self.harness.begin() - self.harness.charm._stored.enabled_hw_tool_list_values = [] - self.harness.charm.on.install.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) - - self.harness.charm.exporter.install.assert_called_once() - self.harness.charm.hw_tool_helper.install.assert_called_with( - self.harness.charm.model.resources, - self.harness.charm._stored.enabled_hw_tool_list_values, - ) - - self.harness.charm.unit.status = ActiveStatus("Install complete") - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_install_missing_resources(self, mock_hw_tool_helper, mock_exporter) -> None: - """Test install event handler when resources are missing.""" - mock_hw_tool_helper.return_value.install.return_value = ( - False, - "Missing resources: ['storcli-deb']", - ) - self.harness.begin() - self.harness.charm.on.install.emit() - - self.assertEqual( - self.harness.charm.unit.status, BlockedStatus("Missing resources: ['storcli-deb']") - ) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_install_redfish_unavailable(self, mock_hw_tool_helper, mock_exporter) -> None: - """Test install event handler when redfish is unavailable.""" - mock_enabled_hw_tool_list = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, - ] - self.mock_get_hw_tool_enable_list.return_value = mock_enabled_hw_tool_list - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - self.harness.begin() - self.harness.charm.on.install.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) - - self.harness.charm.exporter.install.assert_called_with( - 10200, # default in config.yaml - "INFO", # default in config.yaml - {}, - 10, # default int config.yaml - mock_enabled_hw_tool_list, - ) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_exporter_install_fail(self, mock_hw_tool_helper, mock_exporter) -> None: - """Test exporter install failure.""" - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = False - self.harness.begin() - self.harness.charm.validate_exporter_configs = mock.Mock() - self.harness.charm.validate_exporter_configs.return_value = (False, "error") - self.harness.charm.on.install.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) - - self.assertEqual( - self.harness.charm.unit.status, - BlockedStatus("Failed to install exporter, please refer to `juju debug-log`"), - ) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_update_status_all_green(self, mock_hw_tool_helper, mock_exporter): - """Test update_status event handler when everything is okay.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, + @parameterized.expand( + [ + ( + "Enable two exporters", + [HWTool.IPMI_SEL, HWTool.SMARTCTL], + ["hardware-exporter", "smartctl-exporter"], + ) ] - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_hw_tool_helper.return_value.check_installed.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") + ) + @mock.patch("charm.SmartCtlExporter.__init__", return_value=None) + @mock.patch("charm.HardwareExporter.__init__", return_value=None) + def test_exporters(self, _, enable_tools, expect, mock_hw_exporter, mock_smart_exporter): self.harness.begin() - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_update_status_check_installed_false(self, mock_hw_tool_helper, mock_exporter): - """Test update_status event handler when hw tool checks failed.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, + self.harness.charm.get_enable_hw_tools = mock.MagicMock() + self.harness.charm.get_enable_hw_tools.return_value = enable_tools + self.harness.charm._stored.enabled_hw_tool_list_values = [ + tool.value for tool in enable_tools ] - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_hw_tool_helper.return_value.check_installed.return_value = (False, "error") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, BlockedStatus("error")) + exporters = self.harness.charm.exporters + self.harness.charm.get_enable_hw_tools.assert_called() + + if "hardware-exporter" in expect: + self.assertTrue( + any([isinstance(exporter, HardwareExporter) for exporter in exporters]) + ) + mock_hw_exporter.assert_called_with( + self.harness.charm.charm_dir, + self.harness.charm.model.config, + self.harness.charm._stored.enabled_hw_tool_list_values, + ) + if "smartctl-exporter" in expect: + self.assertTrue( + any([isinstance(exporter, HardwareExporter) for exporter in exporters]) + ) + mock_smart_exporter.assert_called_with( + self.harness.charm.charm_dir, + self.harness.charm.model.config, + ) - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_update_status_exporter_crashed(self, mock_hw_tool_helper, mock_exporter): - """Test update_status.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, + @parameterized.expand( + [ + ( + "happy case", + "install", + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [True, True], + ), + ( + "happy case", + "upgrade", + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [True, True], + ), + ( + "missing resource", + "install", + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], + (False, "miss something"), + [mock.MagicMock(), mock.MagicMock()], + [True, True], + ), + ( + "missing resource", + "upgrade", + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], + (False, "miss something"), + [mock.MagicMock(), mock.MagicMock()], + [True, True], + ), + ( + "Exporter install fail", + "install", + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [False, True], + ), + ( + "Exporter install fail", + "upgrade", + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [False, True], + ), ] - mock_hw_tool_helper.return_value.check_installed.return_value = (True, "") - mock_exporter.return_value.check_health.return_value = False - mock_exporter.return_value.restart.side_effect = Exception() - self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.charm._stored.resource_installed = True - with self.assertRaises(ExporterError): - self.harness.charm.on.update_status.emit() - - @mock.patch("charm.HWToolHelper") - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - def test_config_changed(self, mock_exporter, mock_hw_tool_helper): - """Test config change event renders config file.""" - self.harness.begin() - self.harness.charm._stored.resource_installed = True - self.harness.charm.num_cos_agent_relations = 1 # exporter enabled - self.harness.charm.hw_tool_helper.check_installed.return_value = ( - True, - "", - ) # hw tool install ok - - new_config = {"exporter-port": 80, "exporter-log-level": "DEBUG"} - self.harness.update_config(new_config) - self.harness.charm.on.config_changed.emit() - - self.harness.charm.exporter.template.render_config.assert_called() - - self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) - - @mock.patch("charm.HWToolHelper") - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - def test_config_changed_without_cos_agent_relation(self, mock_exporter, mock_hw_tool_helper): - """Test config change event don't render config file if cos_agent relation is missing.""" - self.harness.begin() - self.harness.charm._stored.resource_installed = True - self.harness.charm.num_cos_agent_relations = 0 # exporter disabled - self.harness.charm.hw_tool_helper.check_installed.return_value = ( - True, - "", - ) # hw tool install ok - - new_config = {"exporter-port": 80, "exporter-log-level": "DEBUG"} - self.harness.update_config(new_config) - self.harness.charm.on.config_changed.emit() - - self.harness.charm.exporter.template.render_config.assert_not_called() - - self.assertEqual( - self.harness.charm.unit.status, BlockedStatus("Missing relation: [cos-agent]") - ) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - def test_config_changed_before_install_complete(self, mock_exporter): - """Test config change event is deferred if charm not installed.""" - self.harness.begin() - self.harness.charm._stored.resource_installed = False - - self.harness.charm.on.config_changed.emit() - self.assertEqual(self._get_notice_count("config_changed"), 1) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_upgrade_force_reconfig_exporter(self, mock_hw_tool_helper, mock_exporter) -> None: - """Test upgrade event handler will reconfigure exporter.""" - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - self.harness.begin() - self.harness.charm._stored.exporter_installed = True - self.harness.charm._stored.enabled_hw_tool_list_values = [] - self.harness.charm.on.upgrade_charm.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) - self.assertTrue(self.harness.charm._stored.exporter_installed) + ) + def test_install_or_upgrade( + self, + _, + event, + hw_tools, + hw_tool_helper_install_return, + mock_exporters, + mock_exporter_install_returns, + ) -> None: + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ) as mock_exporters: + self.harness.begin() + self.harness.charm.hw_tool_helper = mock.MagicMock() + self.harness.charm.hw_tool_helper.install.return_value = hw_tool_helper_install_return + self.harness.charm.get_enable_hw_tools = mock.MagicMock() + self.harness.charm.get_enable_hw_tools.return_value = hw_tools + self.harness.charm._on_update_status = mock.MagicMock() + + for mock_exporter, return_val in zip( + self.harness.charm.exporters, mock_exporter_install_returns + ): + mock_exporter.install.return_value = return_val + + if event == "install": + self.harness.charm.on.install.emit() + else: + self.harness.charm.on.upgrade_charm.emit() - self.harness.charm.exporter.install.assert_called_once() self.harness.charm.hw_tool_helper.install.assert_called_with( self.harness.charm.model.resources, - self.harness.charm._stored.enabled_hw_tool_list_values, + hw_tools, ) - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_update_status_config_invalid(self, mock_hw_tool_helper, mock_exporter): - """Test update_status event handler when config is invalid.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, - ] - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_hw_tool_helper.return_value.check_installed.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - - self.harness.charm.validate_exporter_configs = mock.MagicMock() - self.harness.charm.validate_exporter_configs.return_value = (False, "config fail message") - - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, BlockedStatus("config fail message")) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_config_changed_update_alert_rules(self, mock_hw_tool_helper, mock_exporter): - """Test config changed will update alert rule.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, - ] - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_hw_tool_helper.return_value.check_installed.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) + store_resource = False + if hw_tool_helper_install_return[0]: + if all(mock_exporter_install_returns): + for mock_exporter in mock_exporters: + mock_exporter.install.assert_called() + store_resource = True - relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") - metrics_alert_rules = json.loads(relation_data["config"]).get("metrics_alert_rules") + self.assertEqual(self.harness.charm._stored.resource_installed, store_resource) + if store_resource: + self.harness.charm._on_update_status.assert_called() + def test_remove(self): + mock_exporters = [mock.MagicMock(), mock.MagicMock()] with mock.patch( - "charm.COSAgentProvider._metrics_alert_rules", new_callable=mock.PropertyMock - ) as mock_alert_rules: - fake_metrics_alert_rules = {} - mock_alert_rules.return_value = fake_metrics_alert_rules - self.harness.charm.on.config_changed.emit() - - relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") - updated_metrics_alert_rules = json.loads(relation_data["config"]).get( - "metrics_alert_rules" - ) - self.assertEqual(updated_metrics_alert_rules, fake_metrics_alert_rules) - self.assertNotEqual(updated_metrics_alert_rules, metrics_alert_rules) + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ) as mock_exporters: + self.harness.begin() + self.harness.charm.hw_tool_helper = mock.MagicMock() - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_upgrade_charm_update_alert_rules(self, mock_hw_tool_helper, mock_exporter): - """Test upgrade charm event updates alert rule.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, - ] - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_hw_tool_helper.return_value.check_installed.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) + self.harness.charm.get_enable_hw_tools = mock.MagicMock() + self.harness.charm.get_enable_hw_tools.return_value = [ + HWTool.IPMI_SENSOR, + HWTool.IPMI_SEL, + HWTool.SMARTCTL, + ] - relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") - metrics_alert_rules = json.loads(relation_data["config"]).get("metrics_alert_rules") + self.harness.charm.on.remove.emit() - with mock.patch( - "charm.COSAgentProvider._metrics_alert_rules", new_callable=mock.PropertyMock - ) as mock_alert_rules: - fake_metrics_alert_rules = {} - mock_alert_rules.return_value = fake_metrics_alert_rules - self.harness.charm.on.upgrade_charm.emit() - - relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") - updated_metrics_alert_rules = json.loads(relation_data["config"]).get( - "metrics_alert_rules" + self.harness.charm.hw_tool_helper.remove.assert_called_with( + self.harness.charm.model.resources, + [HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.SMARTCTL], ) - self.assertEqual(updated_metrics_alert_rules, fake_metrics_alert_rules) - self.assertNotEqual(updated_metrics_alert_rules, metrics_alert_rules) + for mock_exporter in mock_exporters: + mock_exporter.uninstall.assert_called() + self.assertFalse(self.harness.charm._stored.resource_installed) - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - def test_install_redfish_enabled_with_correct_credential( - self, mock_hw_tool_helper, mock_exporter - ) -> None: - """Test install event when redfish is available and credential is correct.""" - mock_enabled_hw_tool_list = [ - HWTool.REDFISH, + @parameterized.expand( + [ + ( + "happy case", + True, + True, + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + (True, ""), + [True, True], + ), + ( + "resouce_install not True", + False, + True, + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + (True, ""), + [True, True], + ), + ( + "No cos_agent_related", + True, + False, + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + (True, ""), + [True, True], + ), + ( + "Exporter config invalid", + True, + True, + [mock.MagicMock(), mock.MagicMock()], + [(False, "Some invalid msg"), (True, "")], + (True, ""), + [True, True], + ), + ( + "hw tools install not ok", + True, + True, + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + (False, "hw tools not installed"), + [True, True], + ), + ( + "Exporter not health", + True, + True, + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + (True, ""), + [True, False], + ), ] - self.mock_get_hw_tool_enable_list.return_value = mock_enabled_hw_tool_list - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - self.harness.begin() - self.harness.charm.on.install.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) + ) + def test_update_status( # noqa: C901 + self, + _, + resource_installed, + cos_agent_related, + mock_exporters, + mock_exporter_validate_exporter_configs_returns, + hw_tool_check_installed, + mock_exporter_healths, + ): + for mock_exporter, config_valid, health in zip( + mock_exporters, + mock_exporter_validate_exporter_configs_returns, + mock_exporter_healths, + ): + mock_exporter.validate_exporter_configs.return_value = config_valid + mock_exporter.check_health.return_value = health + mock_exporter.restart.side_effect = ExporterError - self.harness.charm.exporter.install.assert_called_with( - 10200, # default in config.yaml - "INFO", # default in config.yaml - self.harness.charm.get_redfish_conn_params(mock_enabled_hw_tool_list), - 10, # default int config.yaml - mock_enabled_hw_tool_list, - ) + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + if cos_agent_related: + self.harness.add_relation("cos-agent", "grafana-agent") + self.harness.begin() + + self.harness.charm.model.unit.status = BlockedStatus("Random status") + self.harness.charm._stored.resource_installed = resource_installed + self.harness.charm.hw_tool_helper = mock.MagicMock() + self.harness.charm.hw_tool_helper.check_installed.return_value = ( + hw_tool_check_installed + ) - @parameterized.expand([(InvalidCredentialsError), (Exception)]) - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - @mock.patch("charm.redfish_client", return_value=mock.MagicMock()) - def test_install_redfish_enabled_with_incorrect_credential( - self, test_exception, mock_redfish_client, mock_hw_tool_helper, mock_exporter - ) -> None: - """Test event install when redfish is available but credential is wrong.""" - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.REDFISH, - ] - mock_redfish_client.side_effect = test_exception() - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.harness.charm.on.install.emit() - - self.assertTrue(self.harness.charm._stored.resource_installed) - - # ensure exporter is installed (not started/enabled) - # even when redfish credentials are wrong - mock_exporter.return_value.install.assert_called_once() - mock_exporter.reset_mock() - self.assertEqual( - self.harness.charm.unit.status, - BlockedStatus("Invalid config: 'redfish-username' or 'redfish-password'"), - ) + self.harness.charm.on.update_status.emit() - @parameterized.expand([(InvalidCredentialsError), (Exception)]) - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch("charm.HWToolHelper", return_value=mock.MagicMock()) - @mock.patch("charm.redfish_client", return_value=mock.MagicMock()) - @mock.patch("charm.HardwareObserverCharm._stored") - def test_config_changed_redfish_enabled_with_incorrect_credential( - self, test_exception, mock_stored, mock_redfish_client, mock_hw_tool_helper, mock_exporter - ) -> None: - """Test event config changed when redfish is available but credential is wrong.""" - mock_stored.enabled_hw_tool_list_values = [ - "ipmi_sensor", - "ipmi_sel", - "ipmi_dcmi", - "redfish", - ] - mock_hw_tool_helper.return_value.install.return_value = (True, "") - mock_hw_tool_helper.return_value.check_installed.return_value = (True, "") - mock_exporter.return_value.install.return_value = True - rid = self.harness.add_relation("cos-agent", "grafana-agent") - self.harness.begin() - self.harness.add_relation_unit(rid, "grafana-agent/0") - - mock_redfish_client.side_effect = test_exception() - new_config = { - "exporter-port": 80, - "exporter-log-level": "DEBUG", - "collect-timeout": 10, - "redfish-username": "redfish", - "redfish-password": "redfish", - } - self.harness.update_config(new_config) - self.harness.charm.on.config_changed.emit() - self.assertEqual( - self.harness.charm.unit.status, - BlockedStatus("Invalid config: 'redfish-username' or 'redfish-password'"), + if not resource_installed: + self.assertEqual( + self.harness.charm.model.unit.status, + BlockedStatus("Random status"), + ) + return + + if not cos_agent_related: + self.assertEqual( + self.harness.charm.model.unit.status, + BlockedStatus("Missing relation: [cos-agent]"), + ) + return + + if not all([res[0] for res in mock_exporter_validate_exporter_configs_returns]): + for valid_config, mock_exporter in zip( + mock_exporter_validate_exporter_configs_returns, + mock_exporters, + ): + ok = valid_config[0] + msg = valid_config[1] + if ok: + mock_exporter.validate_exporter_configs.assert_called() + else: + self.assertEqual( + self.harness.charm.model.unit.status, + BlockedStatus(msg), + ) + break + return + + self.harness.charm.hw_tool_helper.check_installed.assert_called_with( + self.harness.charm.get_enable_hw_tools() ) + if not hw_tool_check_installed[0]: + self.assertEqual( + self.harness.charm.model.unit.status, + BlockedStatus("hw tools not installed"), + ) + return + + if all(mock_exporter_healths): + self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) + else: + for mock_exporter, health in zip( + mock_exporters, + mock_exporter_healths, + ): + if health: + mock_exporter.restart.assert_not_called() + else: + msg = ( + f"Exporter {mock_exporter.exporter_name} " + f"crashed unexpectedly: {ExporterError()}" + ) + self.assertEqual(self.harness.charm.unit.status, BlockedStatus(msg)) @parameterized.expand( [ @@ -577,51 +473,300 @@ def test_detect_hardware_action( else: self.harness.charm._on_install_or_upgrade.assert_not_called() - def test_get_redfish_conn_params_when_redfish_is_available(self): - """Test get_redfish_conn_params when Redfish is available.""" - self.harness.begin() - result = self.harness.charm.get_redfish_conn_params([HWTool.REDFISH]) - expected_result = { - "host": "https://127.0.0.1", - "username": "", - "password": "", - "timeout": 10, - } - self.assertEqual(result, expected_result) - - # redfish client timeout is also set with the value from collect-timeout - new_config = { - "redfish-username": "redfish", - "redfish-password": "redfish", - "collect-timeout": 20, - } - self.harness.update_config(new_config) - expected_result = { - "host": "https://127.0.0.1", - "username": "redfish", - "password": "redfish", - "timeout": 20, - } - result = self.harness.charm.get_redfish_conn_params([HWTool.REDFISH]) - self.assertEqual(result, expected_result) - - def test_get_redfish_conn_params_when_redfish_is_absent(self): - """Test get_redfish_conn_params when Redfish is absent.""" - # Redfish isn't present - self.mock_get_hw_tool_enable_list.return_value = [ - HWTool.IPMI_SENSOR, - HWTool.IPMI_SEL, - HWTool.IPMI_DCMI, + @parameterized.expand( + [ + ( + "happy case", + True, + True, + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + ), + ( + "No resource_installed", + False, + True, + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + ), + ( + "No cos_agent_related", + True, + False, + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + ), + ( + "invalid config", + True, + True, + (False, "invalid msg"), + [mock.MagicMock(), mock.MagicMock()], + [(True, ""), (True, "")], + ), + ( + "Exporter render_config failed", + True, + True, + (True, ""), + [mock.MagicMock(), mock.MagicMock()], + [True, False], + ), ] - self.harness.begin() - result = self.harness.charm.get_redfish_conn_params([]) - self.assertEqual(result, {}) - - new_config = { - "redfish-username": "redfish", - "redfish-password": "redfish", - "collect-timeout": 20, - } - self.harness.update_config(new_config) - result = self.harness.charm.get_redfish_conn_params([]) - self.assertEqual(result, {}) + ) + @mock.patch("charm.logger") + def test_config_changed( + self, + _, + resource_installed, + cos_agent_related, + validate_configs_return, + mock_exporters, + mock_exporters_render_config_returns, + mock_logger, + ): + for mock_exporter, render_config_return in zip( + mock_exporters, + mock_exporters_render_config_returns, + ): + mock_exporter.render_config.return_value = render_config_return + + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + if cos_agent_related: + self.harness.add_relation("cos-agent", "grafana-agent") + self.harness.begin() + self.harness.charm._stored.resource_installed = resource_installed + self.harness.charm.validate_configs = mock.MagicMock() + self.harness.charm.validate_configs.return_value = validate_configs_return + self.harness.charm._on_update_status = mock.MagicMock() + + self.harness.charm.on.config_changed.emit() + + if not resource_installed: + mock_logger.info.assert_called() + if not cos_agent_related: + self.harness.charm.validate_configs.assert_not_called() + self.harness.charm._on_update_status.assert_called() + return + if not validate_configs_return[0]: + self.assertEqual(self.harness.charm.unit.status, BlockedStatus("invalid msg")) + self.harness.charm.exporters[0].render_config.assert_not_called() + return + if not all(mock_exporters_render_config_returns): + for mock_exporter, render_config_return in zip( + mock_exporters, + mock_exporters_render_config_returns, + ): + if render_config_return: + mock_exporter.restart.assert_called() + else: + message = ( + f"Failed to configure {mock_exporter.exporter_name}, " + f"please check if the server is healthy." + ) + self.assertEqual(self.harness.charm.unit.status, BlockedStatus(message)) + self.harness.charm._on_update_status.assert_called() + self.harness.charm._on_update_status.assert_called() + + def test_config_changed_update_alert_rules(self): + """Test config changed will update alert rule.""" + mock_exporter = mock.MagicMock() + mock_exporter.install.return_value = True + mock_exporter.validate_exporter_configs.return_value = (True, "") + mock_exporter.check_health.return_value = True + mock_exporters = [mock_exporter] + + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + rid = self.harness.add_relation("cos-agent", "grafana-agent") + + self.harness.begin() + + self.harness.charm.hw_tool_helper = mock.MagicMock() + self.harness.charm.hw_tool_helper.install.return_value = (True, "") + self.harness.charm.hw_tool_helper.check_installed.return_value = (True, "") + + self.harness.charm.get_enable_hw_tools = mock.MagicMock() + self.harness.charm.get_enable_hw_tools.return_value = [ + HWTool.IPMI_SENSOR, + HWTool.IPMI_SEL, + HWTool.IPMI_DCMI, + ] + + self.harness.charm.on.install.emit() + self.harness.add_relation_unit(rid, "grafana-agent/0") + self.harness.charm.on.update_status.emit() + self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) + + relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") + metrics_alert_rules = json.loads(relation_data["config"]).get("metrics_alert_rules") + + with mock.patch( + "charm.COSAgentProvider._metrics_alert_rules", new_callable=mock.PropertyMock + ) as mock_alert_rules: + fake_metrics_alert_rules = {} + mock_alert_rules.return_value = fake_metrics_alert_rules + self.harness.charm.on.config_changed.emit() + + relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") + updated_metrics_alert_rules = json.loads(relation_data["config"]).get( + "metrics_alert_rules" + ) + self.assertEqual(updated_metrics_alert_rules, fake_metrics_alert_rules) + self.assertNotEqual(updated_metrics_alert_rules, metrics_alert_rules) + + def test_upgrade_charm_update_alert_rules(self): + """Test config changed will update alert rule.""" + mock_exporter = mock.MagicMock() + mock_exporter.install.return_value = True + mock_exporter.validate_exporter_configs.return_value = (True, "") + mock_exporter.check_health.return_value = True + mock_exporters = [mock_exporter] + + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + rid = self.harness.add_relation("cos-agent", "grafana-agent") + + self.harness.begin() + + self.harness.charm.hw_tool_helper = mock.MagicMock() + self.harness.charm.hw_tool_helper.install.return_value = (True, "") + self.harness.charm.hw_tool_helper.check_installed.return_value = (True, "") + + self.harness.charm.get_enable_hw_tools = mock.MagicMock() + self.harness.charm.get_enable_hw_tools.return_value = [ + HWTool.IPMI_SENSOR, + HWTool.IPMI_SEL, + HWTool.IPMI_DCMI, + ] + + self.harness.charm.on.install.emit() + self.harness.add_relation_unit(rid, "grafana-agent/0") + self.harness.charm.on.update_status.emit() + self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) + + relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") + metrics_alert_rules = json.loads(relation_data["config"]).get("metrics_alert_rules") + + with mock.patch( + "charm.COSAgentProvider._metrics_alert_rules", new_callable=mock.PropertyMock + ) as mock_alert_rules: + fake_metrics_alert_rules = {} + mock_alert_rules.return_value = fake_metrics_alert_rules + self.harness.charm.on.upgrade_charm.emit() + + relation_data = self.harness.get_relation_data(rid, "hardware-observer/0") + updated_metrics_alert_rules = json.loads(relation_data["config"]).get( + "metrics_alert_rules" + ) + self.assertEqual(updated_metrics_alert_rules, fake_metrics_alert_rules) + self.assertNotEqual(updated_metrics_alert_rules, metrics_alert_rules) + + @parameterized.expand( + [ + ("happy case", True), + ("No resource_installed", False), + ] + ) + def test_on_relation_joined(self, _, resource_installed): + mock_exporters = [mock.MagicMock()] + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + self.harness.begin() + self.harness.charm._on_update_status = mock.MagicMock() + self.harness.charm._stored.resource_installed = resource_installed + + rid = self.harness.add_relation("cos-agent", "grafana-agent") + self.harness.add_relation_unit(rid, "grafana-agent/0") + + if not resource_installed: + self.harness.charm._on_update_status.assert_not_called() + return + for mock_exporter in mock_exporters: + mock_exporter.enable_and_start.assert_called() + self.harness.charm._on_update_status.assert_called() + + @parameterized.expand( + [ + ("happy case", True), + ] + ) + def test_relation_departed(self, _, resource_installed): + mock_exporters = [mock.MagicMock()] + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + self.harness.begin() + self.harness.charm._on_update_status = mock.MagicMock() + + rid = self.harness.add_relation("cos-agent", "grafana-agent") + self.harness.add_relation_unit(rid, "grafana-agent/0") + rid = self.harness.remove_relation(rid) + + for mock_exporter in mock_exporters: + mock_exporter.disable_and_stop.assert_called() + self.harness.charm._on_update_status.assert_called() + + @parameterized.expand( + [ + ( + "happy case", + [10000, 10001], + [(True, ""), (True, "")], + (True, "Charm config is valid."), + ), + ( + "exporter invalied", + [10000, 10001], + [(True, ""), (False, "Invalied msg")], + (False, "Invalied msg"), + ), + ( + "happy case", + [10000, 10000], + [(True, ""), (True, "")], + (False, "Ports must be unique for each exporter."), + ), + ] + ) + def test_validate_configs( + self, _, mock_exporter_ports, mock_exporter_validate_exporter_configs_returns, expect + ): + mock_exporters = [mock.MagicMock(), mock.MagicMock()] + for mock_exporter, port, return_val in zip( + mock_exporters, mock_exporter_ports, mock_exporter_validate_exporter_configs_returns + ): + mock_exporter.validate_exporter_configs.return_value = return_val + mock_exporter.port = port + with mock.patch( + "charm.HardwareObserverCharm.exporters", + new_callable=mock.PropertyMock( + return_value=mock_exporters, + ), + ): + self.harness.begin() + result = self.harness.charm.validate_configs() + self.assertEqual(result, expect) diff --git a/tests/unit/test_exporter.py b/tests/unit/test_exporter.py deleted file mode 100644 index adbe55cd..00000000 --- a/tests/unit/test_exporter.py +++ /dev/null @@ -1,375 +0,0 @@ -# Copyright 2023 Canotical Ltd. -# See LICENSE file for licensing details. - -import pathlib -import unittest -from unittest import mock - -import ops -from ops.model import ActiveStatus, BlockedStatus -from ops.testing import Harness -from parameterized import parameterized - -import charm -import service -from charm import HardwareObserverCharm -from config import EXPORTER_CONFIG_PATH, HWTool - -ops.testing.SIMULATE_CAN_CONNECT = True - -EXPORTER_RELATION_NAME = "cos-agent" - - -class TestExporter(unittest.TestCase): - """Test Exporter's methods.""" - - def setUp(self): - """Set up harness for each test case.""" - self.harness = Harness(HardwareObserverCharm) - self.addCleanup(self.harness.cleanup) - - systemd_lib_patcher = mock.patch.object(service, "systemd") - self.mock_systemd = systemd_lib_patcher.start() - self.addCleanup(systemd_lib_patcher.stop) - - hw_tool_lib_patcher = mock.patch.object(charm, "HWToolHelper") - mock_hw_tool_helper = hw_tool_lib_patcher.start() - mock_hw_tool_helper.return_value.install.return_value = [True, ""] - mock_hw_tool_helper.return_value.check_installed.return_value = [True, ""] - self.addCleanup(hw_tool_lib_patcher.stop) - - get_bmc_address_patcher = mock.patch("charm.get_bmc_address", return_value="127.0.0.1") - get_bmc_address_patcher.start() - self.addCleanup(get_bmc_address_patcher.stop) - - get_charm_hw_tool_enable_list_patcher = mock.patch( - "charm.get_hw_tool_enable_list", - return_value=[HWTool.IPMI_SENSOR, HWTool.IPMI_SEL, HWTool.IPMI_DCMI, HWTool.REDFISH], - ) - get_charm_hw_tool_enable_list_patcher.start() - self.addCleanup(get_charm_hw_tool_enable_list_patcher.stop) - - redfish_client_patcher = mock.patch("charm.redfish_client") - redfish_client_patcher.start() - self.addCleanup(redfish_client_patcher.stop) - - os_patcher = mock.patch.object(service, "os") - os_patcher.start() - self.addCleanup(os_patcher.stop) - - @classmethod - def setUpClass(cls): - exporter_health_retry_count_patcher = mock.patch("charm.EXPORTER_HEALTH_RETRY_COUNT", 1) - exporter_health_retry_count_patcher.start() - cls.addClassCleanup(exporter_health_retry_count_patcher.stop) - - exporter_health_retry_timeout_patcher = mock.patch( - "charm.EXPORTER_HEALTH_RETRY_TIMEOUT", 0 - ) - exporter_health_retry_timeout_patcher.start() - cls.addClassCleanup(exporter_health_retry_timeout_patcher.stop) - - def test_install_okay(self): - """Test exporter service is installed when charm is installed - okay.""" - self.harness.begin() - - with mock.patch("builtins.open", new_callable=mock.mock_open) as mock_open: - self.harness.charm.on.install.emit() - mock_open.assert_called() - self.mock_systemd.daemon_reload.assert_called_once() - - def test_install_failed_rendering(self): - """Test exporter service is failed to installed - failed to render.""" - self.harness.begin() - - with mock.patch("builtins.open", new_callable=mock.mock_open) as mock_open: - mock_open.side_effect = NotADirectoryError() - self.harness.charm.on.install.emit() - mock_open.assert_called() - self.mock_systemd.daemon_reload.assert_not_called() - - with mock.patch("builtins.open", new_callable=mock.mock_open) as mock_open: - mock_open.side_effect = PermissionError() - self.harness.charm.on.install.emit() - mock_open.assert_called() - self.mock_systemd.daemon_reload.assert_not_called() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_uninstall_okay(self, mock_service_exists): - """Test exporter service is uninstalled when charm is removed - okay.""" - self.harness.begin() - - with mock.patch.object(pathlib.Path, "unlink") as mock_unlink: - self.harness.charm.on.remove.emit() - mock_unlink.assert_called() - self.mock_systemd.daemon_reload.assert_called_once() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_uninstall_failed(self, mock_service_exists): - """Test exporter service is not uninstalled - failed to remove.""" - self.harness.begin() - - with mock.patch.object(pathlib.Path, "unlink") as mock_unlink: - mock_unlink.side_effect = PermissionError() - self.harness.charm.on.remove.emit() - mock_unlink.assert_called() - self.mock_systemd.daemon_reload.assert_not_called() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_start_okay(self, mock_service_installed): - """Test exporter service started when relation is joined.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - self.harness.charm._stored.resource_installed = True - self.harness.charm._stored.exporter_installed = True - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_start.assert_called_once() - self.mock_systemd.service_enable.assert_called_once() - - @mock.patch.object(pathlib.Path, "exists", return_value=False) - def test_start_failed(self, mock_service_not_installed): - """Test exporter service failed to started when relation is joined.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_start.assert_not_called() - self.mock_systemd.service_enable.assert_not_called() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_start_defer_resource_not_ready(self, mock_service_installed): - """Test exporter service started when relation is joined.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - self.harness.charm._stored.resource_installed = False - self.harness.charm._stored.exporter_installed = True - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_start.assert_not_called() - self.mock_systemd.service_enable.assert_not_called() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_start_defer_exporter_not_ready(self, mock_service_installed): - """Test exporter service started when relation is joined.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - self.harness.charm._stored.resource_installed = True - self.harness.charm._stored.exporter_installed = False - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_start.assert_not_called() - self.mock_systemd.service_enable.assert_not_called() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_stop_okay(self, mock_service_installed): - """Test exporter service is stopped when service is installed and relation is departed.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - self.harness.charm._stored.resource_installed = True - self.harness.charm._stored.exporter_installed = True - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.harness.remove_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_stop.assert_called_once() - self.mock_systemd.service_disable.assert_called_once() - - @mock.patch.object(pathlib.Path, "exists", return_value=False) - def test_stop_failed(self, mock_service_not_installed): - """Test exporter service failed to stop when service is not installed.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - self.harness.charm._stored.resource_installed = True - self.harness.charm._stored.exporter_installed = True - with self.assertRaises(service.ExporterError): - self.harness.add_relation_unit(rid, "grafana-agent/0") - with self.assertRaises(service.ExporterError): - self.harness.remove_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_stop.assert_not_called() - self.mock_systemd.service_disable.assert_not_called() - - @parameterized.expand( - [ - (False, ActiveStatus("Unit is ready"), True), - (True, ActiveStatus("Unit is ready"), True), - (False, ActiveStatus("Unit is ready"), False), - ] - ) - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_check_health( - self, - failed, - expected_status, - restart_okay, - mock_service_installed, - ): - """Test check_health function when service is installed.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - with mock.patch("builtins.open", new_callable=mock.mock_open) as _: - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - - self.mock_systemd.service_running.return_value = restart_okay - self.mock_systemd.service_failed.return_value = failed - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, expected_status) - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_check_health_exporter_crash(self, mock_service_installed): - """Test check_health function when service is installed but exporter crashes.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - with mock.patch("builtins.open", new_callable=mock.mock_open) as _: - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - - self.mock_systemd.service_running.return_value = False - self.mock_systemd.service_failed.return_value = True - with self.assertRaises(service.ExporterError): - self.harness.charm.on.update_status.emit() - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_check_relation_exists(self, mock_service_installed): - """Test check_relation function when relation exists.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - with mock.patch("builtins.open", new_callable=mock.mock_open) as _: - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.mock_systemd.service_failed.return_value = False - self.harness.charm.on.update_status.emit() - self.assertEqual(self.harness.charm.unit.status, ActiveStatus("Unit is ready")) - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_check_relation_not_exists(self, mock_service_installed): - """Test check_relation function when relation does not exists.""" - self.harness.begin() - with mock.patch("builtins.open", new_callable=mock.mock_open) as _: - self.harness.charm.on.install.emit() - self.mock_systemd.service_failed.return_value = False - self.harness.charm.on.update_status.emit() - self.assertEqual( - self.harness.charm.unit.status, BlockedStatus("Missing relation: [cos-agent]") - ) - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_config_changed_log_level_okay(self, mock_service_installed): - """Test on_config_change function when exporter-log-level is changed.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - - with mock.patch("builtins.open", new_callable=mock.mock_open): - self.mock_systemd.service_failed.return_value = False - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - # this will trigger config changed event - self.harness.update_config({"exporter-log-level": "DEBUG"}) - self.mock_systemd.service_restart.assert_called_once() - self.assertEqual( - self.harness.charm.unit.status, - ActiveStatus("Unit is ready"), - ) - - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_invalid_exporter_log_level(self, mock_service_installed): - """Test on_config_change function when exporter-log-level is changed.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - - with mock.patch("builtins.open", new_callable=mock.mock_open): - self.mock_systemd.service_failed.return_value = False - self.harness.charm.on.install.emit() - self.harness.add_relation_unit(rid, "grafana-agent/0") - # self.harness.charm.validate_exporter_configs = mock.Mock() - # self.harness.charm.validate_exporter_configs.return_value = (False, "error") - self.harness.update_config({"exporter-port": 102000, "exporter-log-level": "DEBUG"}) - self.harness.charm.on.config_changed.emit() - self.assertEqual( - self.harness.charm.unit.status, BlockedStatus("Invalid config: 'exporter-port'") - ) - self.harness.update_config({"exporter-port": 8080, "exporter-log-level": "xxx"}) - self.harness.charm.on.config_changed.emit() - self.assertEqual( - self.harness.charm.unit.status, - BlockedStatus("Invalid config: 'exporter-log-level'"), - ) - - @mock.patch("charm.Exporter", return_value=mock.MagicMock()) - @mock.patch.object(pathlib.Path, "exists", return_value=True) - def test_render_config_fail(self, mock_service_installed, mock_exporter): - """Test on_config_change function when render config fails.""" - rid = self.harness.add_relation(EXPORTER_RELATION_NAME, "grafana-agent") - self.harness.begin() - - with mock.patch("builtins.open", new_callable=mock.mock_open): - self.mock_systemd.service_failed.return_value = False - mock_exporter.return_value.install.return_value = True - self.harness.charm.on.install.emit() - mock_exporter.return_value.template.render_config.return_value = False - self.harness.add_relation_unit(rid, "grafana-agent/0") - self.harness.charm.on.config_changed.emit() - self.mock_systemd.service_restart.assert_not_called() - self.assertEqual( - self.harness.charm.unit.status, - BlockedStatus( - "Failed to configure exporter, please check if the server is healthy." - ), - ) - - -class TestExporterTemplate(unittest.TestCase): - def setUp(self): - """Set up harness for each test case.""" - search_path = pathlib.Path(f"{__file__}/../../..").resolve() - self.template = service.ExporterTemplate(search_path) - - def test_render_config(self): - with mock.patch.object(self.template, "_install") as mock_install: - self.template.render_config( - port="80", - level="info", - redfish_conn_params={}, - collect_timeout=10, - hw_tools=[HWTool.STORCLI, HWTool.SSACLI], - ) - mock_install.assert_called_with( - EXPORTER_CONFIG_PATH, - self.template.config_template.render( - PORT="80", - LEVEL="info", - COLLECT_TIMEOUT=10, - COLLECTORS=["collector.mega_raid", "collector.hpe_ssa"], - REDFISH_ENABLE=False, - REDFISH_HOST="", - REDFISH_PASSWORD="", - REDFISH_USERNAME="", - REDFISH_CLIENT_TIMEOUT=10, - ), - mode=0o600, - ) - - def test_render_config_redfish(self): - with mock.patch.object(self.template, "_install") as mock_install: - self.template.render_config( - port="80", - level="info", - collect_timeout=10, - redfish_conn_params={ - "host": "127.0.0.1", - "username": "default_user", - "password": "default_pwd", - "timeout": 10, - }, - hw_tools=[HWTool.REDFISH], - ) - mock_install.assert_called_with( - EXPORTER_CONFIG_PATH, - self.template.config_template.render( - PORT="80", - LEVEL="info", - COLLECT_TIMEOUT=10, - COLLECTORS=["collector.redfish"], - REDFISH_ENABLE=True, - REDFISH_HOST="127.0.0.1", - REDFISH_PASSWORD="default_pwd", - REDFISH_USERNAME="default_user", - REDFISH_CLIENT_TIMEOUT="10", - ), - mode=0o600, - ) diff --git a/tests/unit/test_hw_tools.py b/tests/unit/test_hw_tools.py index 7ada9218..ac451c96 100644 --- a/tests/unit/test_hw_tools.py +++ b/tests/unit/test_hw_tools.py @@ -1,6 +1,8 @@ import stat import subprocess +import tempfile import unittest +from http import HTTPStatus from pathlib import Path from unittest import mock @@ -29,8 +31,11 @@ PercCLIStrategy, ResourceChecksumError, ResourceFileSizeZeroError, + ResourceInstallationError, SAS2IRCUStrategy, SAS3IRCUStrategy, + SmartCtlExporterStrategy, + SmartCtlStrategy, SSACLIStrategy, StorCLIStrategy, StrategyABC, @@ -40,6 +45,7 @@ bmc_hw_verifier, check_deb_pkg_installed, copy_to_snap_common_bin, + disk_hw_verifier, file_is_empty, get_hw_tool_enable_list, install_deb, @@ -728,14 +734,136 @@ def test_remove(self, mock_apt): mock_apt.remove_package.assert_not_called() +class TestSmartCtlStrategy(unittest.TestCase): + @mock.patch("apt_helpers.get_candidate_version") + @mock.patch("apt_helpers.apt") + def test_install(self, mock_apt, mock_candidate_version): + strategy = SmartCtlStrategy() + mock_candidate_version.return_value = "some-candidate-version" + strategy.install() + + mock_apt.add_package.assert_called_with( + "smartmontools", version="some-candidate-version", update_cache=False + ) + + @mock.patch("hw_tools.apt") + def test_remove(self, mock_apt): + strategy = SmartCtlStrategy() + strategy.remove() + + mock_apt.remove_package.assert_not_called() + + @mock.patch("hw_tools.check_deb_pkg_installed") + def test_check(self, mock_check_deb_method): + strategy = SmartCtlStrategy() + strategy.check() + + mock_check_deb_method.assert_called_with("smartmontools") + + +class TestSmartCtlExporterStrategy(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.tmp_path = Path(self.temp_dir.name) + + def tearDown(self): + self.temp_dir.cleanup() + + @mock.patch("hw_tools.requests.get") + @mock.patch("hw_tools.tarfile.open") + @mock.patch("hw_tools.make_executable") + def test_install_success( + self, + mock_make_executable, + mock_tar_open, + mock_requests_get, + ): + strategy = SmartCtlExporterStrategy() + strategy._resource_dir = self.tmp_path + strategy._exporter_path = self.tmp_path / "smartctl_exporter" + + mock_response = mock.MagicMock(status_code=HTTPStatus.OK) + mock_response.content = b"dummy content" + mock_requests_get.return_value = mock_response + mock_member = mock.MagicMock(name="member") + mock_member.name = "smartctl_exporter" + mock_member_file = mock.MagicMock() + mock_member_file.read.return_value = b"dummy content" + mock_tar_open.return_value.__enter__.return_value.getmembers.return_value = [mock_member] + mock_tar_open.return_value.__enter__.return_value.extractfile.return_value = ( + mock_member_file # noqa: E501 + ) + + strategy.install() + + mock_requests_get.assert_called_with(strategy._release, timeout=60) + # mock_tar_open.assert_called_with(fileobj=BytesIO(b"dummy content"), mode="r:gz") + mock_make_executable.assert_called_with(strategy._exporter_path) + self.assertTrue(strategy._resource_dir.exists()) + + @mock.patch("hw_tools.requests.get") + def test_install_download_failure(self, mock_requests_get): + strategy = SmartCtlExporterStrategy() + strategy._resource_dir = self.tmp_path + strategy._exporter_path = self.tmp_path / "smartctl_exporter" + + mock_response = mock.MagicMock(status_code=HTTPStatus.NOT_FOUND) + mock_requests_get.return_value = mock_response + + with self.assertRaises(ResourceInstallationError): + strategy.install() + + @mock.patch("hw_tools.requests.get") + @mock.patch("hw_tools.tarfile.open") + def test_install_parse_failure(self, mock_tar_open, mock_requests_get): + strategy = SmartCtlExporterStrategy() + strategy._resource_dir = self.tmp_path + strategy._exporter_path = self.tmp_path / "smartctl_exporter" + + mock_response = mock.MagicMock(status_code=HTTPStatus.OK) + mock_response.content = b"dummy content" + mock_requests_get.return_value = mock_response + mock_member = mock.MagicMock(name="member") + mock_member.name = "random name" + mock_member_file = mock.MagicMock() + mock_member_file.read.return_value = b"dummy content" + mock_tar_open.return_value.__enter__.return_value.getmembers.return_value = [mock_member] + mock_tar_open.return_value.__enter__.return_value.extractfile.return_value = ( + mock_member_file # noqa: E501 + ) + + with self.assertRaises(ResourceInstallationError): + strategy.install() + + @mock.patch("hw_tools.shutil.rmtree") + def test_remove(self, mock_shutil_rmtree): + strategy = SmartCtlExporterStrategy() + + strategy.remove() + + mock_shutil_rmtree.assert_called_with(strategy._resource_dir) + + def test_check(self): + strategy = SmartCtlExporterStrategy() + strategy._exporter_path = mock.MagicMock() + strategy._exporter_path.is_file.return_value = True + + result = strategy.check() + self.assertTrue(result) + + strategy._exporter_path.is_file.assert_called() + + +@mock.patch("hw_tools.disk_hw_verifier", return_value=[7, 8, 9]) @mock.patch("hw_tools.bmc_hw_verifier", return_value=[1, 2, 3]) @mock.patch("hw_tools.raid_hw_verifier", return_value=[4, 5, 6]) -def test_get_hw_tool_enable_list(mock_raid_verifier, mock_bmc_hw_verifier): +def test_get_hw_tool_enable_list(mock_raid_verifier, mock_bmc_hw_verifier, mock_disk_hw_verifier): get_hw_tool_enable_list.cache_clear() output = get_hw_tool_enable_list() mock_raid_verifier.assert_called() mock_bmc_hw_verifier.assert_called() - assert output == [4, 5, 6, 1, 2, 3] + mock_disk_hw_verifier.assert_called() + assert output == [4, 5, 6, 1, 2, 3, 7, 8, 9] @mock.patch("hw_tools._raid_hw_verifier_hwinfo", return_value=set([4, 5, 6])) @@ -853,6 +981,18 @@ def test_raid_hw_verifier_hwinfo(mock_hwinfo, hwinfo_output, expect): case.assertCountEqual(output, expect) +class TestDiskHWVerifier(unittest.TestCase): + @mock.patch("hw_tools.lshw", return_value=[True]) + def test_disk_available(self, mock_lshw): + tools = disk_hw_verifier() + self.assertEqual(tools, [HWTool.SMARTCTL]) + + @mock.patch("hw_tools.lshw", return_value=[]) + def test_disk_not_available(self, mock_lshw): + tools = disk_hw_verifier() + self.assertEqual(tools, []) + + class TestIPMIHWVerifier(unittest.TestCase): @mock.patch("hw_tools.requests.get") @mock.patch("hw_tools.get_bmc_address", return_value="1.2.3.4") diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py new file mode 100644 index 00000000..216d7566 --- /dev/null +++ b/tests/unit/test_service.py @@ -0,0 +1,732 @@ +# Copyright 2024 Canotical Ltd. +# See LICENSE file for licensing details. + +import pathlib +import tempfile +import unittest +from unittest import mock + +import yaml +from parameterized import parameterized +from redfish.rest.v1 import InvalidCredentialsError + +import service +from config import HARDWARE_EXPORTER_SETTINGS, HWTool + + +class TestBaseExporter(unittest.TestCase): + """Test Hardware Exporter methods.""" + + def setUp(self) -> None: + """Set up harness for each test case.""" + systemd_lib_patcher = mock.patch.object(service, "systemd") + self.mock_systemd = systemd_lib_patcher.start() + self.addCleanup(systemd_lib_patcher.stop) + + get_bmc_address_patcher = mock.patch("service.get_bmc_address", return_value="127.0.0.1") + get_bmc_address_patcher.start() + self.addCleanup(get_bmc_address_patcher.stop) + + search_path = pathlib.Path(f"{__file__}/../../..").resolve() + self.mock_config = { + "hardware-exporter-port": 10200, + "collect-timeout": 10, + "exporter-log-level": "INFO", + "redfish-username": "", + "redfish-password": "", + } + self.mock_stored_hw_tool_list_values = ["storcli", "ssacli"] + service.BaseExporter.__abstractmethods__ = set() + + self.exporter = service.BaseExporter( + search_path, self.mock_config, HARDWARE_EXPORTER_SETTINGS + ) + + @parameterized.expand( + [ + ( + { + "verify_render_files_exist": True, + "install_resources": True, + "render_config": True, + "render_service": True, + }, + { + "verify_render_files_exist": True, + "install_resources": True, + "render_config": True, + "render_service": True, + }, + True, + True, + ), + ( + { + "verify_render_files_exist": True, + "install_resources": False, + "render_config": True, + "render_service": True, + }, + { + "verify_render_files_exist": False, + "install_resources": True, + "render_config": False, + "render_service": False, + }, + False, + False, + ), + ( + { + "verify_render_files_exist": True, + "install_resources": True, + "render_config": False, + "render_service": True, + }, + { + "verify_render_files_exist": False, + "install_resources": True, + "render_config": True, + "render_service": False, + }, + False, + False, + ), + ( + { + "verify_render_files_exist": True, + "install_resources": True, + "render_config": True, + "render_service": False, + }, + { + "verify_render_files_exist": False, + "install_resources": True, + "render_config": True, + "render_service": True, + }, + False, + False, + ), + ( + { + "verify_render_files_exist": False, + "install_resources": True, + "resources_exist": True, + "render_config": True, + "render_service": True, + }, + { + "verify_render_files_exist": True, + "verify_render_files_exist": True, + "install_resources": True, + "render_config": True, + "render_service": True, + }, + False, + False, + ), + ] + ) + def test_install(self, mock_methods, method_calls, expected_result, systemd_daemon_called): + """Test exporter install method.""" + for method, return_value in mock_methods.items(): + m = mock.MagicMock() + m.return_value = return_value + setattr(self.exporter, method, m) + + result = self.exporter.install() + self.assertEqual(result, expected_result) + + for method, accept_called in method_calls.items(): + m = getattr(self.exporter, method) + if accept_called: + m.assert_called() + else: + m.assert_not_called() + + if systemd_daemon_called: + self.mock_systemd.daemon_reload.assert_called_once() + else: + self.mock_systemd.daemon_reload.assert_not_called() + + def test_install_failed_resources_not_exist(self): + """Test exporter install method when rendering fails.""" + self.exporter.install_resources = mock.MagicMock() + self.exporter.install_resources.return_value = True + self.exporter.resources_exist = mock.MagicMock() + self.exporter.resources_exist.return_value = False + self.exporter.render_config = mock.MagicMock() + self.exporter.render_config.return_value = True + self.exporter.render_service = mock.MagicMock() + self.exporter.render_service.return_value = True + + result = self.exporter.install() + self.assertFalse(result) + + self.exporter.install_resources.assert_called() + self.exporter.resources_exist.assert_called() + self.exporter.render_config.assert_not_called() + self.exporter.render_service.assert_not_called() + + self.mock_systemd.daemon_reload.assert_not_called() + + @mock.patch.object(pathlib.Path, "exists", return_value=True) + def test_uninstall_okay(self, mock_service_exists): + """Test exporter uninstall method.""" + with mock.patch.object(pathlib.Path, "unlink") as mock_unlink: + self.exporter.uninstall() + mock_unlink.assert_called() + self.mock_systemd.daemon_reload.assert_called_once() + + @mock.patch.object(pathlib.Path, "exists", return_value=True) + def test_uninstall_failed(self, mock_service_exists): + """Test exporter uninstall method with permission error.""" + with mock.patch.object(pathlib.Path, "unlink") as mock_unlink: + mock_unlink.side_effect = PermissionError() + self.exporter.uninstall() + mock_unlink.assert_called() + self.mock_systemd.daemon_reload.assert_not_called() + + def test_enable_and_start(self): + """Test exporter enable and start behavior.""" + self.exporter.enable_and_start() + self.mock_systemd.service_enable.assert_called_once() + self.mock_systemd.service_start.assert_called_once() + + def test_disable_and_stop(self): + """Test exporter disable and stop behavior.""" + self.exporter.disable_and_stop() + self.mock_systemd.service_disable.assert_called_once() + self.mock_systemd.service_stop.assert_called_once() + + def test_validate_exporter_config_okay(self): + self.exporter.port = 10000 + self.exporter.log_level = "debug" + self.assertEqual( + (True, "Exporter config is valid."), self.exporter.validate_exporter_configs() + ) + + def test_validate_exporter_config_failed_port(self): + self.exporter.port = 70000 + self.assertEqual( + (False, "Invalid config: exporter's port"), + self.exporter.validate_exporter_configs(), + ) + + def test_validate_exporter_config_failed_log_level(self): + self.exporter.port = 10000 + self.exporter.log_level = "not-allowed_level_choices" + self.assertEqual( + (False, "Invalid config: 'exporter-log-level'"), + self.exporter.validate_exporter_configs(), + ) + + @mock.patch("service.remove_file") + def test_remove_service_okay(self, mock_remove_file): + self.exporter.exporter_service_path = mock.MagicMock() + self.exporter.exporter_service_path.exists.return_value = True + mock_remove_file.return_value = "rm-something" + result = self.exporter.remove_service() + self.assertEqual(result, "rm-something") + mock_remove_file.assert_called_with(self.exporter.exporter_service_path) + + @mock.patch("service.remove_file") + def test_remove_service_file_not_exists(self, mock_remove_file): + self.exporter.exporter_service_path = mock.MagicMock() + self.exporter.exporter_service_path.exists.return_value = False + result = self.exporter.remove_service() + self.assertTrue(result) + mock_remove_file.assert_not_called() + + @mock.patch("service.remove_file") + def test_remove_config_okay(self, mock_remove_file): + self.exporter.exporter_config_path = mock.MagicMock() + self.exporter.exporter_config_path.exists.return_value = True + mock_remove_file.return_value = "rm-something" + result = self.exporter.remove_config() + self.assertEqual(result, "rm-something") + mock_remove_file.assert_called_with(self.exporter.exporter_config_path) + + @mock.patch("service.remove_file") + def test_remove_config_file_not_exists(self, mock_remove_file): + self.exporter.exporter_config_path = mock.MagicMock() + self.exporter.exporter_config_path.exists.return_value = False + result = self.exporter.remove_config() + self.assertTrue(result) + mock_remove_file.assert_not_called() + + @mock.patch("service.remove_file") + def test_remove_config_skip(self, mock_remove_file): + result = self.exporter.remove_config() + self.assertTrue(result) + mock_remove_file.assert_not_called() + + def test_install_resources(self): + result = self.exporter.install_resources() + self.assertTrue(result) + + def test_remove_resources(self): + result = self.exporter.remove_resources() + self.assertTrue(result) + + def test_resource_exists(self): + result = self.exporter.resources_exist() + self.assertTrue(result) + + @mock.patch("service.systemd") + def test__restart(self, mock_systemd): + self.exporter._restart() + mock_systemd.service_restart.assert_called_with(self.exporter.exporter_name) + + @mock.patch("service.systemd") + def test_check_health_okay(self, mock_systemd): + mock_systemd.service_failed.return_value = True + self.assertFalse(self.exporter.check_health()) + + @mock.patch("service.systemd") + def test_check_health_failed(self, mock_systemd): + mock_systemd.service_failed.return_value = False + self.assertTrue(self.exporter.check_health()) + + @mock.patch("service.systemd") + def test_check_active(self, mock_systemd): + mock_systemd.service_running.return_value = True + self.assertTrue(self.exporter.check_active()) + + @mock.patch("service.systemd") + def test_check_active_failed(self, mock_systemd): + mock_systemd.service_running.return_value = False + self.assertFalse(self.exporter.check_active()) + + def test_render_service(self): + self.exporter._render_service = mock.MagicMock() + self.exporter._render_service.return_value = "some-bool" + result = self.exporter.render_service() + self.exporter._render_service.assert_called_with({}) + self.assertEqual(result, "some-bool") + + @mock.patch("service.write_to_file") + def test__render_service(self, mock_write_to_file): + self.exporter.service_template.render = mock.MagicMock() + self.exporter.exporter_service_path = "some-config-path" + self.exporter.service_template.render.return_value = "some-content" + mock_write_to_file.return_value = "some-result" + + params = {"A": "a", "B": "b"} + result = self.exporter._render_service(params) + self.assertEqual(mock_write_to_file.return_value, result) + + self.exporter.service_template.render.assert_called_with(**params) + mock_write_to_file.assert_called_with("some-config-path", "some-content") + + @mock.patch("service.write_to_file") + def test_render_config_okay(self, mock_write_to_file): + self.exporter.exporter_config_path = "some-path" + self.exporter._render_config_content = mock.MagicMock() + self.exporter._render_config_content.return_value = "some-config-content" + mock_write_to_file.return_value = "some-result" + + result = self.exporter.render_config() + + mock_write_to_file.assert_called_with("some-path", "some-config-content", mode=0o600) + self.assertEqual("some-result", result) + + @mock.patch("service.write_to_file") + def test_render_config_skip(self, mock_write_to_file): + self.exporter.exporter_config_path = None + mock_write_to_file.return_value = "some-result" + + result = self.exporter.render_config() + + mock_write_to_file.assert_not_called() + self.assertEqual(True, result) + + def test__render_config_content(self): + result = self.exporter._render_config_content() + self.assertEqual(result, "") + + @parameterized.expand( + [ + (True, True, True, True), + (True, False, True, False), + (True, True, False, False), + (False, True, True, True), + ] + ) + def test_verify_render_files_exist( + self, required_config, config_exists, service_exists, expect + ): + self.exporter.exporter_config_path = None + if required_config: + self.exporter.exporter_config_path = mock.MagicMock() + self.exporter.exporter_config_path.exists.return_value = config_exists + self.exporter.exporter_service_path = mock.MagicMock() + self.exporter.exporter_service_path.exists.return_value = service_exists + + result = self.exporter.verify_render_files_exist() + self.assertEqual(result, expect) + + @parameterized.expand( + [ + ("success", [False, False, True, True]), + ("failure", [False, False, False, False]), + ("exception", [Exception("Some error"), Exception("Some error")]), + ] + ) + @mock.patch("service.sleep") + def test_restart(self, _, check_active_results, mock_sleep): + # Mocking necessary methods and attributes + self.exporter.settings.health_retry_count = 3 + self.exporter.settings.health_retry_timeout = 1 + self.exporter._restart = mock.MagicMock() + self.exporter.check_active = mock.MagicMock() + self.exporter.check_active.side_effect = check_active_results + + # Call the restart method + if isinstance(check_active_results[-1], Exception) or check_active_results[-1] is False: + with self.assertRaises(service.ExporterError): + self.exporter.restart() + else: + self.exporter.restart() + + # Assert that the methods are called as expected + if isinstance(check_active_results[-1], Exception): + pass # If an exception occurs, it's caught and raised + else: + self.assertTrue(self.exporter.check_active.called) + + +class TestHardwareExporter(unittest.TestCase): + """Test Hardware Exporter's methods.""" + + def setUp(self) -> None: + """Set up harness for each test case.""" + get_bmc_address_patcher = mock.patch("service.get_bmc_address", return_value="127.0.0.1") + get_bmc_address_patcher.start() + self.addCleanup(get_bmc_address_patcher.stop) + + search_path = pathlib.Path(f"{__file__}/../../..").resolve() + self.mock_config = { + "hardware-exporter-port": 10200, + "collect-timeout": 10, + "exporter-log-level": "INFO", + "redfish-username": "", + "redfish-password": "", + } + self.mock_stored_hw_tool_list_values = ["storcli", "ssacli"] + self.exporter = service.HardwareExporter( + search_path, self.mock_config, self.mock_stored_hw_tool_list_values + ) + + def test_render_service(self): + """Test render service.""" + self.exporter._render_service = mock.MagicMock() + self.exporter._render_service.return_value = "some result" + + result = self.exporter.render_service() + self.assertEqual(result, "some result") + + self.exporter._render_service.assert_called_with( + { + "CHARMDIR": str(self.exporter.charm_dir), + "CONFIG_FILE": str(self.exporter.exporter_config_path), + } + ) + + def test_validate_exporter_config_okay(self): + self.exporter.redfish_conn_params_valid = mock.MagicMock() + self.exporter.redfish_conn_params_valid.return_value = True + + self.assertEqual( + (True, "Exporter config is valid."), self.exporter.validate_exporter_configs() + ) + + @mock.patch("builtins.super", return_value=mock.MagicMock()) + def test_validate_exporter_config_super_failed(self, mock_super): + self.exporter.redfish_conn_params_valid = mock.MagicMock() + self.exporter.redfish_conn_params_valid.return_value = True + + mock_super.return_value.validate_exporter_configs.return_value = (False, "something wrong") + self.assertEqual((False, "something wrong"), self.exporter.validate_exporter_configs()) + + mock_super.return_value.validate_exporter_configs.accept_called() + self.exporter.redfish_conn_params_valid.assert_not_called() + + def test_validate_exporter_config_redfish_conn_params_failed(self): + self.exporter.redfish_conn_params_valid = mock.MagicMock() + self.exporter.redfish_conn_params_valid.return_value = False + + self.assertEqual( + (False, "Invalid config: 'redfish-username' or 'redfish-password'"), + self.exporter.validate_exporter_configs(), + ) + + def test_render_config_content(self): + """Test render config content.""" + content = self.exporter._render_config_content() + content_config = yaml.safe_load(content) + self.assertEqual(content_config["port"], 10200) + self.assertEqual(content_config["level"], "INFO") + self.assertEqual(content_config["collect_timeout"], 10) + self.assertEqual( + content_config["enable_collectors"], ["collector.mega_raid", "collector.hpe_ssa"] + ) + + def test_get_redfish_conn_params_when_redfish_is_available(self): + """Test get_redfish_conn_params when Redfish is available.""" + self.exporter.enabled_hw_tool_list = ["redfish"] + result = self.exporter.get_redfish_conn_params(self.mock_config) + expected_result = { + "host": "https://127.0.0.1", + "username": "", + "password": "", + "timeout": 10, + } + self.assertEqual(result, expected_result) + + def test_get_redfish_conn_params_when_redfish_is_unavailable(self): + """Test get_redfish_conn_params when Redfish is not available.""" + self.exporter.enabled_hw_tool_list = ["ssacli"] + result = self.exporter.get_redfish_conn_params(self.mock_config) + expected_result = {} + self.assertEqual(result, expected_result) + + @mock.patch("service.redfish_client") + def test_redfish_conn_params_valid_success(self, mock_redfish_client): + redfish_conn_params = { + "host": "hosta", + "username": "usernameb", + "password": "passwordc", + "timeout": "timeoutd", + } + result = self.exporter.redfish_conn_params_valid(redfish_conn_params) + self.assertTrue(result) + + mock_redfish_client.assert_called_with( + base_url="hosta", + username="usernameb", + password="passwordc", + timeout="timeoutd", + max_retry=self.exporter.settings.redfish_max_retry, + ) + mock_redfish_client.return_value.login.assert_called_with(auth="session") + mock_redfish_client.return_value.logout.assert_called() + + @mock.patch("service.redfish_client") + def test_redfish_conn_params_valid_miss_redfish_params(self, mock_redfish_client): + redfish_conn_params = {} + result = self.exporter.redfish_conn_params_valid(redfish_conn_params) + self.assertEqual(result, None) + + mock_redfish_client.assert_not_called() + + @mock.patch("service.redfish_client") + def test_redfish_conn_params_valid_failed_invalid_credentials_error(self, mock_redfish_client): + redfish_conn_params = { + "host": "hosta", + "username": "usernameb", + "password": "passwordc", + "timeout": "timeoutd", + } + mock_redfish_client.side_effect = InvalidCredentialsError + result = self.exporter.redfish_conn_params_valid(redfish_conn_params) + + mock_redfish_client.assert_called_with( + base_url="hosta", + username="usernameb", + password="passwordc", + timeout="timeoutd", + max_retry=self.exporter.settings.redfish_max_retry, + ) + self.assertFalse(result) + mock_redfish_client.return_value.login.assert_not_called() + + @mock.patch("service.redfish_client") + def test_redfish_conn_params_valid_failed_exception(self, mock_redfish_client): + redfish_conn_params = { + "host": "hosta", + "username": "usernameb", + "password": "passwordc", + "timeout": "timeoutd", + } + mock_redfish_client.side_effect = Exception + result = self.exporter.redfish_conn_params_valid(redfish_conn_params) + + mock_redfish_client.assert_called_with( + base_url="hosta", + username="usernameb", + password="passwordc", + timeout="timeoutd", + max_retry=self.exporter.settings.redfish_max_retry, + ) + self.assertFalse(result) + mock_redfish_client.return_value.login.assert_not_called() + + def test_hw_tools(self): + self.assertEqual( + self.exporter.hw_tools(), + [ + HWTool.STORCLI, + HWTool.SSACLI, + HWTool.SAS2IRCU, + HWTool.SAS3IRCU, + HWTool.PERCCLI, + HWTool.IPMI_DCMI, + HWTool.IPMI_SEL, + HWTool.IPMI_SENSOR, + HWTool.REDFISH, + ], + ) + + +class TestSmartMetricExporter(unittest.TestCase): + """Test SmartCtlExporter's methods.""" + + def setUp(self) -> None: + """Set up harness for each test case.""" + search_path = pathlib.Path(f"{__file__}/../../..").resolve() + self.mock_config = { + "smartctl-exporter-port": 10201, + "collect-timeout": 10, + "exporter-log-level": "INFO", + } + self.exporter = service.SmartCtlExporter(search_path, self.mock_config) + + def test_render_service(self): + """Test render service.""" + self.exporter._render_service = mock.MagicMock() + self.exporter._render_service.return_value = "some result" + + result = self.exporter.render_service() + self.assertEqual(result, "some result") + + self.exporter._render_service.assert_called_with( + { + "PORT": str(self.exporter.port), + "LEVEL": self.exporter.log_level, + } + ) + + def test_hw_tools(self): + self.assertEqual(self.exporter.hw_tools(), [HWTool.SMARTCTL]) + + @mock.patch("service.systemd", return_value=mock.MagicMock()) + def test_install_resource_restart(self, mock_systemd): + self.exporter.strategy = mock.MagicMock() + self.exporter.check_active = mock.MagicMock() + self.exporter.check_active.return_value = True + + self.exporter.install_resources() + + self.exporter.strategy.install.accept_called() + self.exporter.check_active.accept_called() + mock_systemd.service_stop.accept_called_with(self.exporter.exporter_name) + mock_systemd.service_restart.accept_called_with(self.exporter.exporter_name) + + @mock.patch("service.systemd", return_value=mock.MagicMock()) + def test_install_resource_no_restart(self, mock_systemd): + self.exporter.strategy = mock.MagicMock() + self.exporter.check_active = mock.MagicMock() + self.exporter.check_active.return_value = False + + self.exporter.install_resources() + + self.exporter.strategy.install.accept_called() + self.exporter.check_active.accept_called() + mock_systemd.service_stop.accept_not_called() + mock_systemd.service_restart.accept_not_called() + + def test_resource_exists(self): + self.exporter.strategy = mock.MagicMock() + + self.exporter.resources_exist() + self.exporter.strategy.check.accept_called() + + def test_resources_exist(self): + self.exporter.strategy = mock.MagicMock() + self.exporter.strategy.check.return_value = "some result" + + result = self.exporter.resources_exist() + + self.assertEqual(result, "some result") + self.exporter.strategy.check.accept_called() + + def test_resource_remove(self): + self.exporter.strategy = mock.MagicMock() + + result = self.exporter.remove_resources() + self.assertEqual(result, True) + + self.exporter.strategy.remove.accept_called() + + +class TestWriteToFile(unittest.TestCase): + def setUp(self): + self.temp_file = tempfile.NamedTemporaryFile(delete=False) + self.temp_file.close() + + def tearDown(self): + pathlib.Path(self.temp_file.name).unlink() + + @mock.patch("builtins.open", new_callable=mock.mock_open) + @mock.patch("service.os") + def test_write_to_file_success(self, mock_os, mock_open): + path = pathlib.Path(self.temp_file.name) + content = "Hello, world!" + + mock_file = mock_open.return_value.__enter__.return_value + + result = service.write_to_file(path, content) + self.assertTrue(result) + + mock_open.assert_called_with(path, "w", encoding="utf-8") + mock_file.write.assert_called_with(content) + + @mock.patch("service.os.open", new_callable=mock.mock_open) + @mock.patch("service.os.fdopen", new_callable=mock.mock_open) + @mock.patch("service.os") + def test_write_to_file_with_mode_success(self, mock_os, mock_fdopen, mock_open): + path = pathlib.Path(self.temp_file.name) + content = "Hello, world!" + + mock_file = mock_fdopen.return_value.__enter__.return_value + + result = service.write_to_file(path, content, mode=0o600) + self.assertTrue(result) + + mock_open.assert_called_with(path, mock_os.O_CREAT | mock_os.O_WRONLY, 0o600) + mock_fdopen.assert_called_with(mock_open.return_value, "w", encoding="utf-8") + mock_file.write.assert_called_with(content) + + @mock.patch("builtins.open", new_callable=mock.mock_open) + def test_write_to_file_permission_error(self, mock_open): + path = pathlib.Path(self.temp_file.name) + content = "Hello, world!" + + # Mocking os.open and os.fdopen to raise PermissionError + mock_open.side_effect = PermissionError("Permission denied") + + # Call the function + result = service.write_to_file(path, content) + + # Assert calls and result + self.assertFalse(result) + + @mock.patch("builtins.open", new_callable=mock.mock_open) + def test_write_to_file_not_a_directory_error(self, mock_open): + path = pathlib.Path(self.temp_file.name) + content = "Hello, world!" + + # Mocking os.open and os.fdopen to raise PermissionError + mock_open.side_effect = NotADirectoryError("Not a directory") + + # Call the function + result = service.write_to_file(path, content) + + # Assert calls and result + self.assertFalse(result) + + +if __name__ == "__main__": + unittest.main()