Skip to content

Commit

Permalink
Add config option collect_timeout (#67)
Browse files Browse the repository at this point in the history
* Add config option collect_timeout
  • Loading branch information
sudeephb authored Mar 21, 2024
1 parent 5dab74f commit f3e9563
Show file tree
Hide file tree
Showing 16 changed files with 293 additions and 152 deletions.
8 changes: 8 additions & 0 deletions prometheus_hardware_exporter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
SsaCLICollector,
)
from .config import (
DEFAULT_COLLECT_TIMEOUT,
DEFAULT_CONFIG,
DEFAULT_IPMI_SEL_INTERVAL,
DEFAULT_REDFISH_CLIENT_MAX_RETRY,
Expand Down Expand Up @@ -74,6 +75,12 @@ def parse_command_line() -> argparse.Namespace:
default=DEFAULT_IPMI_SEL_INTERVAL,
type=int,
)
parser.add_argument(
"--collect-timeout",
help="The timeout duration when running the shell commands to get the hardware data",
default=DEFAULT_COLLECT_TIMEOUT,
type=int,
)
parser.add_argument(
"--collector.hpe_ssa",
help="Enable HPE Smart Array Controller collector (default: disabled)",
Expand Down Expand Up @@ -197,6 +204,7 @@ def main() -> None:
redfish_client_timeout=namespace.redfish_client_timeout,
redfish_client_max_retry=namespace.redfish_client_max_retry,
redfish_discover_cache_ttl=namespace.redfish_discover_cache_ttl,
collect_timeout=namespace.collect_timeout,
)

# Start the exporter
Expand Down
38 changes: 28 additions & 10 deletions prometheus_hardware_exporter/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
class PowerEdgeRAIDCollector(BlockingCollector):
"""Collector for PowerEdge RAID controller."""

perccli = PercCLI()
def __init__(self, config: Config) -> None:
"""Initialize the collector."""
self.perccli = PercCLI(config)
super().__init__(config)

@property
def specifications(self) -> List[Specification]:
Expand Down Expand Up @@ -177,8 +180,11 @@ def process(self, payloads: List[Payload], datastore: Dict[str, Payload]) -> Lis
class MegaRAIDCollector(BlockingCollector):
"""Collector for MegaRAID controller."""

storcli = StorCLI()
mega_raid_helper = MegaRAIDCollectorHelper()
def __init__(self, config: Config) -> None:
"""Initialize the collector."""
self.storcli = StorCLI(config)
self.mega_raid_helper = MegaRAIDCollectorHelper()
super().__init__(config)

@property
def specifications(self) -> List[Specification]:
Expand Down Expand Up @@ -324,9 +330,12 @@ def process(self, payloads: List[Payload], datastore: Dict[str, Payload]) -> Lis
class IpmiDcmiCollector(BlockingCollector):
"""Collector for ipmi dcmi metrics."""

ipmi_dcmi = IpmiDcmi()
ipmi_tool = IpmiTool()
dmidecode = Dmidecode()
def __init__(self, config: Config) -> None:
"""Initialze the collector."""
self.ipmi_dcmi = IpmiDcmi(config)
self.ipmi_tool = IpmiTool(config)
self.dmidecode = Dmidecode(config)
super().__init__(config)

@property
def specifications(self) -> List[Specification]:
Expand Down Expand Up @@ -410,7 +419,10 @@ def process(self, payloads: List[Payload], datastore: Dict[str, Payload]) -> Lis
class IpmiSensorsCollector(BlockingCollector):
"""Collector for ipmi sensors data."""

ipmimonitoring = IpmiMonitoring()
def __init__(self, config: Config) -> None:
"""Initialize the collector."""
self.ipmimonitoring = IpmiMonitoring(config)
super().__init__(config)

@property
def specifications(self) -> List[Specification]:
Expand Down Expand Up @@ -560,7 +572,10 @@ def _get_sensor_value_from_reading(self, reading: str) -> float:
class IpmiSelCollector(BlockingCollector):
"""Collector for IPMI SEL data."""

ipmi_sel = IpmiSel()
def __init__(self, config: Config) -> None:
"""Initialize the collector."""
self.ipmi_sel = IpmiSel(config)
super().__init__(config)

@property
def specifications(self) -> List[Specification]:
Expand Down Expand Up @@ -626,7 +641,7 @@ class LSISASControllerCollector(BlockingCollector):
def __init__(self, version: int, config: Config) -> None:
"""Initialize the collector."""
self.version = version
self.sasircu = Sasircu(version)
self.sasircu = Sasircu(config, version)
self.lsi_sas_helper = LSISASCollectorHelper()
super().__init__(config)

Expand Down Expand Up @@ -804,7 +819,10 @@ def process(self, payloads: List[Payload], datastore: Dict[str, Payload]) -> Lis
class SsaCLICollector(BlockingCollector):
"""Collector for storage arrays that support ssacli."""

ssacli = SsaCLI()
def __init__(self, config: Config) -> None:
"""Initialize the collector."""
super().__init__(config)
self.ssacli = SsaCLI(self.config)

@property
def specifications(self) -> List[Specification]:
Expand Down
6 changes: 4 additions & 2 deletions prometheus_hardware_exporter/collectors/sasircu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from logging import getLogger
from typing import Any, Dict, List, Set, Tuple

from prometheus_hardware_exporter.config import Config

from ..utils import Command

logger = getLogger(__name__)
Expand Down Expand Up @@ -53,11 +55,11 @@ class Sasircu(Command):
prefix = ""
command = ""

def __init__(self, version: int) -> None:
def __init__(self, config: Config, version: int) -> None:
"""Initialize the command line tool."""
self.version = version
self.command = f"sas{version}ircu"
super().__init__()
super().__init__(config)

def _parse_key_value(self, text: str) -> Dict[str, Any]:
"""Return a dictionary from a text with the format of "key : value".
Expand Down
2 changes: 2 additions & 0 deletions prometheus_hardware_exporter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

DEFAULT_CONFIG = os.path.join(os.environ.get("SNAP_DATA", "./"), "config.yaml")

DEFAULT_COLLECT_TIMEOUT = 30
DEFAULT_IPMI_SEL_INTERVAL = 86400
DEFAULT_REDFISH_CLIENT_TIMEOUT = 15
DEFAULT_REDFISH_CLIENT_MAX_RETRY = 1
Expand All @@ -26,6 +27,7 @@ class Config(BaseModel):
level: str = "DEBUG"
enable_collectors: List[str] = []

collect_timeout: int = DEFAULT_COLLECT_TIMEOUT
ipmi_sel_interval: int = DEFAULT_IPMI_SEL_INTERVAL

redfish_host: str = "127.0.0.1"
Expand Down
11 changes: 9 additions & 2 deletions prometheus_hardware_exporter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from logging import getLogger
from typing import Optional, Union

from .config import Config

logger = getLogger(__name__)


Expand All @@ -23,9 +25,10 @@ class Command:
prefix = ""
command = ""

def __init__(self) -> None:
def __init__(self, config: Config) -> None:
"""Initialize the Command class."""
self.installed = False
self.config = config

def __call__(self, args: Optional[str] = None) -> Result:
"""Run the command, and return the result and error.
Expand Down Expand Up @@ -62,7 +65,11 @@ def check_output(
try:
logger.debug("Running command: %s", full_command)
result.data = (
subprocess.check_output(full_command, shell=True, timeout=30).decode().strip()
subprocess.check_output(
full_command, shell=True, timeout=self.config.collect_timeout
)
.decode()
.strip()
)
except subprocess.CalledProcessError as err:
logger.error(err)
Expand Down
145 changes: 80 additions & 65 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,18 @@ class TestCustomCollector(unittest.TestCase):
def test_mega_raid_collector_not_installed(self):
"""Test mega raid collector when storcli is not installed."""
mega_raid_collector = MegaRAIDCollector(Mock())
mega_raid_collector.sasircu = Mock()
mega_raid_collector.sasircu.installed = False
mega_raid_collector.storcli = Mock()
mega_raid_collector.storcli.installed = False
payloads = mega_raid_collector.collect()

self.assertEqual(len(list(payloads)), 1)

def test_mega_raid_collector_no_controller(self):
"""Test mega raid collector when no controllers are present."""
mega_raid_collector = MegaRAIDCollector(Mock())
mega_raid_collector.storcli = Mock()
mega_raid_collector.storcli.installed = True
mega_raid_collector.storcli.get_all_information.return_value = {}
payloads = mega_raid_collector.collect()

self.assertEqual(len(list(payloads)), 1)
Expand Down Expand Up @@ -469,16 +479,17 @@ def test_ssacli_installed_and_okay(self):
for payload in payloads:
self.assertIn(payload.name, available_metrics)

def test_perccli_collector_command_success(self):
with patch.object(PowerEdgeRAIDCollector, "perccli") as mock_cli:
# 1 success, 1 fail
mock_cli.ctrl_exists.return_value = True
mock_cli.ctrl_successes.return_value = {0: False, 1: True}
mock_cli.get_controllers.return_value = {"count": 1}
mock_cli.get_virtual_drives.return_value = {}
@patch("prometheus_hardware_exporter.collector.PercCLI")
def test_perccli_collector_command_success(self, mock_perccli):
perccli = mock_perccli()
perccli.ctrl_exists.return_value = True
perccli.ctrl_successes.return_value = {0: False, 1: True}
perccli.get_controllers.return_value = {"count": 1}
perccli.get_virtual_drives.return_value = {}

power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())

power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
assert len(payloads) >= 4

assert payloads[0].samples[0].value == 1.0
Expand All @@ -491,17 +502,18 @@ def test_perccli_collector_command_success(self):
assert payloads[2].samples[0].labels["controller_id"] == "1"
assert payloads[2].samples[0].name == "perccli_command_ctrl_success"

def test_perccli_virtual_device_command_success(self):
with patch.object(PowerEdgeRAIDCollector, "perccli") as mock_cli:
mock_cli.success.return_value = True
mock_cli.ctrl_successes.return_value = {0: False, 1: True}
mock_cli.get_controllers.return_value = {"count": 1}
mock_cli.get_virtual_drives.return_value = {
0: [{"DG": "0", "VD": "0", "cache": "NRWTD", "state": "Optl"}]
}
@patch("prometheus_hardware_exporter.collector.PercCLI")
def test_perccli_virtual_device_command_success(self, mock_perccli):
perccli = mock_perccli()
perccli.success.return_value = True
perccli.ctrl_successes.return_value = {0: False, 1: True}
perccli.get_controllers.return_value = {"count": 1}
perccli.get_virtual_drives.return_value = {
0: [{"DG": "0", "VD": "0", "cache": "NRWTD", "state": "Optl"}]
}

power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())

get_payloads = []

Expand All @@ -526,51 +538,54 @@ def test_perccli_virtual_device_command_success(self):
]:
assert name in get_payloads

def test_perccli_cmd_fail(self):
with patch.object(PowerEdgeRAIDCollector, "perccli") as mock_cli:
mock_cli.success.return_value = False
power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
assert len(payloads) == 1
assert payloads[0].samples[0].value == 0.0

def test_perccli_no_controller_exists(self):
with patch.object(PowerEdgeRAIDCollector, "perccli") as mock_cli:
mock_cli.success.return_value = True
mock_cli.ctrl_exists.return_value = False
power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
assert len(payloads) == 2
assert payloads[1].samples[0].value == 0.0

def test_perccli_physical_device_command_success(self):
with patch.object(PowerEdgeRAIDCollector, "perccli") as mock_cli:
mock_cli.success.return_value = True
mock_cli.ctrl_successes.return_value = {0: False, 1: True}
mock_cli.get_controllers.return_value = {"count": 1}
mock_cli.get_physical_devices.return_value = {
0: [
{
"eid": "69",
"slt": "0",
"state": "Onln",
"DG": 0,
"size": "558.375 GB",
"media_type": "HDD",
},
{
"eid": "69",
"slt": "1",
"state": "Onln",
"DG": 0,
"size": "558.375 GB",
"media_type": "HDD",
},
]
}
@patch("prometheus_hardware_exporter.collector.PercCLI")
def test_perccli_cmd_fail(self, mock_perccli):
perccli = mock_perccli()
perccli.success.return_value = False
power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
assert len(payloads) == 1
assert payloads[0].samples[0].value == 0.0

@patch("prometheus_hardware_exporter.collector.PercCLI")
def test_perccli_no_controller_exists(self, mock_perccli):
perccli = mock_perccli()
perccli.success.return_value = True
perccli.ctrl_exists.return_value = False
power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
assert len(payloads) == 2
assert payloads[1].samples[0].value == 0.0

@patch("prometheus_hardware_exporter.collector.PercCLI")
def test_perccli_physical_device_command_success(self, mock_perccli):
perccli = mock_perccli()
perccli.success.return_value = True
perccli.ctrl_successes.return_value = {0: False, 1: True}
perccli.get_controllers.return_value = {"count": 1}
perccli.get_physical_devices.return_value = {
0: [
{
"eid": "69",
"slt": "0",
"state": "Onln",
"DG": 0,
"size": "558.375 GB",
"media_type": "HDD",
},
{
"eid": "69",
"slt": "1",
"state": "Onln",
"DG": 0,
"size": "558.375 GB",
"media_type": "HDD",
},
]
}

power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())
power_edge_collector = PowerEdgeRAIDCollector(Mock())
payloads = list(power_edge_collector.collect())

get_payloads = []

Expand Down
7 changes: 5 additions & 2 deletions tests/unit/test_dmidecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest.mock import patch

from prometheus_hardware_exporter.collectors.dmidecode import Dmidecode
from prometheus_hardware_exporter.config import Config
from prometheus_hardware_exporter.utils import Command, Result

TYPE_39_OUTPUT = "tests/unit/test_resources/dmidecode/dmidecode_type_39_output.txt"
Expand All @@ -14,14 +15,16 @@ class TestDmidecode(unittest.TestCase):
def test_00_get_power_capacities_success(self, mock_call):
with open(TYPE_39_OUTPUT, "r") as content:
mock_call.return_value = Result(content.read(), None)
dmidecode = Dmidecode()
config = Config()
dmidecode = Dmidecode(config)
power_capacities = dmidecode.get_power_capacities()
self.assertEqual(power_capacities, [1400, 1400])

@patch.object(Command, "__call__")
def test_01_get_power_capacities_error(self, mock_call):
mock_call.return_value = Result("", True)

dmidecode = Dmidecode()
config = Config()
dmidecode = Dmidecode(config)
power_capacities = dmidecode.get_power_capacities()
self.assertEqual(power_capacities, [])
Loading

0 comments on commit f3e9563

Please sign in to comment.