Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add metrics ipmi_dcmi_power_consumption_rate #45

Merged
merged 7 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 42 additions & 3 deletions prometheus_hardware_exporter/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
SessionCreationError,
)

from .collectors.ipmi_dcmi import IpmiDcmi
from .collectors.dmidecode import Dmidecode
from .collectors.ipmi_dcmi import IpmiDcmi, IpmiTool
from .collectors.ipmi_sel import IpmiSel
from .collectors.ipmimonitoring import IpmiMonitoring
from .collectors.perccli import PercCLI
Expand Down Expand Up @@ -324,13 +325,15 @@ class IpmiDcmiCollector(BlockingCollector):
"""Collector for ipmi dcmi metrics."""

ipmi_dcmi = IpmiDcmi()
ipmi_tool = IpmiTool()
dmidecode = Dmidecode()

@property
def specifications(self) -> List[Specification]:
"""Define dcmi metric specs."""
return [
Specification(
name="ipmi_dcmi_power_cosumption_watts",
name="ipmi_dcmi_power_consumption_watts",
documentation="Current power consumption in watts",
metric_class=GaugeMetricFamily,
),
Expand All @@ -339,6 +342,14 @@ def specifications(self) -> List[Specification]:
documentation="Indicates if the ipmi dcmi command is successful or not",
metric_class=GaugeMetricFamily,
),
Specification(
name="ipmi_dcmi_power_consumption_percentage",
documentation=(
"Current power capacity usage as a percentage of the overall PSU budget"
),
labels=["ps_redundancy", "get_ps_redundancy_ok", "maximum_power_capacity"],
metric_class=GaugeMetricFamily,
),
]

def fetch(self) -> List[Payload]:
Expand All @@ -349,11 +360,39 @@ def fetch(self) -> List[Payload]:
logger.error("Failed to fetch current power from ipmi dcmi")
return [Payload(name="ipmi_dcmi_command_success", value=0.0)]

get_ps_redundancy_ok, ps_redundancy = self.ipmi_tool.get_ps_redundancy()
# Because we fail to get the redundancy config from the server,
# Suppose redundancy enable make denominator smaller
# and alert is more easy to fire.
if not get_ps_redundancy_ok:
ps_redundancy = True

power_capacities = self.dmidecode.get_power_capacities()
maximum_power_capacity = (
Pjack marked this conversation as resolved.
Show resolved Hide resolved
(ps_redundancy and len(power_capacities) > 0)
and sum(power_capacities) / len(power_capacities)
or sum(power_capacities)
)

power_capacity_percentage = (
maximum_power_capacity
and current_power_payload["current_power"] / maximum_power_capacity
or 0
)

ps_redundancy_str = "1" if ps_redundancy else "0"
get_ps_redundancy_ok_str = "1" if get_ps_redundancy_ok else "0"

payloads = [
Payload(
name="ipmi_dcmi_power_cosumption_watts",
name="ipmi_dcmi_power_consumption_watts",
value=current_power_payload["current_power"],
),
Payload(
name="ipmi_dcmi_power_consumption_percentage",
value=power_capacity_percentage,
labels=[ps_redundancy_str, get_ps_redundancy_ok_str, str(maximum_power_capacity)],
),
Payload(name="ipmi_dcmi_command_success", value=1.0),
]
return payloads
Expand Down
33 changes: 33 additions & 0 deletions prometheus_hardware_exporter/collectors/dmidecode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Dmidecode metrics collector."""
import re
from functools import lru_cache
from logging import getLogger
from typing import List

from ..utils import Command

logger = getLogger(__name__)


MAX_POWER_CAPACITY_REGEX = r"(Max Power Capacity: )(\d+)( W)"


class Dmidecode(Command):
"""Command line tool for dmidecode."""

prefix = ""
command = "dmidecode"

@lru_cache # PSU ratings won't change over the lifetime of a server
def get_power_capacities(self) -> List[int]:
"""Get list of power capacities."""
result = self("-t 39")
if result.error:
logger.error(result.error)
return []

lines = re.findall(MAX_POWER_CAPACITY_REGEX, result.data)
powers = []
for line in lines:
powers.append(int(line[1]))
return powers
28 changes: 27 additions & 1 deletion prometheus_hardware_exporter/collectors/ipmi_dcmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re
from logging import getLogger
from typing import Dict
from typing import Dict, Tuple

from ..utils import Command

Expand All @@ -11,6 +11,32 @@
CURRENT_POWER_REGEX = re.compile(r"^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*")


class IpmiTool(Command):
"""Command line tool for ipmitool."""

prefix = ""
command = "ipmitool"

def get_ps_redundancy(self) -> Tuple[bool, bool]:
"""Get power supply redundancy.

returns:
- ok - Get redundancy success?
- redundancy - enable redundancy?
sudeephb marked this conversation as resolved.
Show resolved Hide resolved
"""
result = self("""sdr type "Power Supply" -c""")
if result.error:
logger.error(result.error)
return False, False
output = []
for line in result.data.splitlines():
data = line.split(",")
if "Redundancy" in data[0]:
# column 4 is redundancy status
output.append(data[4])
return True, all(status == "Fully Redundant" for status in output) | False


class IpmiDcmi(Command):
"""Command line tool for ipmi dcmi."""

Expand Down
27 changes: 27 additions & 0 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,9 @@ def test_30_ipmi_dcmi_collector_not_installed(self):
ipmi_dcmi_collector.ipmi_dcmi = Mock()
ipmi_dcmi_collector.ipmi_dcmi.installed = False
ipmi_dcmi_collector.ipmi_dcmi.get_current_power.return_value = {}

ipmi_dcmi_collector.ipmi_tool = Mock()
ipmi_dcmi_collector.dmidecode = Mock()
payloads = ipmi_dcmi_collector.collect()

self.assertEqual(len(list(payloads)), 1)
Expand All @@ -303,10 +306,34 @@ def test_31_ipmi_dcmi_collector_installed_and_okay(self):
"""Test ipmi dcmi collector can fetch correct number of metrics."""
ipmi_dcmi_collector = IpmiDcmiCollector(Mock())
ipmi_dcmi_collector.ipmi_dcmi = Mock()
ipmi_dcmi_collector.ipmi_tool = Mock()
ipmi_dcmi_collector.dmidecode = Mock()

mock_dcmi_payload = {"current_power": 105}

ipmi_dcmi_collector.ipmi_dcmi.get_current_power.return_value = mock_dcmi_payload
ipmi_dcmi_collector.ipmi_tool.get_ps_redundancy.return_value = (True, True)
ipmi_dcmi_collector.dmidecode.get_power_capacities.return_value = [1000, 1000]

payloads = ipmi_dcmi_collector.collect()

available_metrics = [spec.name for spec in ipmi_dcmi_collector.specifications]
self.assertEqual(len(list(payloads)), len(available_metrics))
for payload in payloads:
self.assertIn(payload.name, available_metrics)

def test_32_ipmi_dcmi_collector_get_ps_redundancy_not_ok(self):
"""Test ipmi dcmi collector can fetch correct number of metrics."""
Pjack marked this conversation as resolved.
Show resolved Hide resolved
ipmi_dcmi_collector = IpmiDcmiCollector(Mock())
ipmi_dcmi_collector.ipmi_dcmi = Mock()
ipmi_dcmi_collector.ipmi_tool = Mock()
ipmi_dcmi_collector.dmidecode = Mock()

mock_dcmi_payload = {"current_power": 105}

ipmi_dcmi_collector.ipmi_dcmi.get_current_power.return_value = mock_dcmi_payload
ipmi_dcmi_collector.ipmi_tool.get_ps_redundancy.return_value = (False, True)
Pjack marked this conversation as resolved.
Show resolved Hide resolved
ipmi_dcmi_collector.dmidecode.get_power_capacities.return_value = [1000, 1000]

payloads = ipmi_dcmi_collector.collect()

Expand Down
27 changes: 27 additions & 0 deletions tests/unit/test_dmidecode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import unittest
from unittest.mock import patch

from prometheus_hardware_exporter.collectors.dmidecode import Dmidecode
from prometheus_hardware_exporter.utils import Command, Result

TYPE_39_OUTPUT = "tests/unit/test_resources/dmidecode/dmidecode_type_39_output.txt"


class TestDmidecode(unittest.TestCase):
"""Test the Dmidecode class."""

@patch.object(Command, "__call__")
def test_00_get_power_capacities_success(self, mock_call):
with open(TYPE_39_OUTPUT, "r") as content:
mock_call.return_value = Result(content.read(), None)
dmidecode = Dmidecode()
power_capacities = dmidecode.get_power_capacities()
self.assertEqual(power_capacities, [1400, 1400])

@patch.object(Command, "__call__")
def test_01_get_power_capacities_error(self, mock_call):
mock_call.return_value = Result("", True)

dmidecode = Dmidecode()
power_capacities = dmidecode.get_power_capacities()
self.assertEqual(power_capacities, [])
22 changes: 21 additions & 1 deletion tests/unit/test_ipmi_dcmi.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import unittest
from unittest.mock import patch

from prometheus_hardware_exporter.collectors.ipmi_dcmi import IpmiDcmi
from prometheus_hardware_exporter.collectors.ipmi_dcmi import IpmiDcmi, IpmiTool
from prometheus_hardware_exporter.utils import Command, Result

DCMI_SAMPLE_OUTPUT = "tests/unit/test_resources/ipmi/ipmi_dcmi_sample_output.txt"
IPMITOOL_SDR_PS_SAMPLE_OUTPUT = "tests/unit/test_resources/ipmi/ipmitool_sdr_ps_sample_output.txt"


class TestIpmiDcmi(unittest.TestCase):
Expand All @@ -31,3 +32,22 @@ def test_01_get_current_power_parse_failure(self, mock_call):
ipmi_dcmi = IpmiDcmi()
payload = ipmi_dcmi.get_current_power()
self.assertEqual(payload, {})


class TestIpmiTool(unittest.TestCase):
"""Test the IpmiTool class."""

@patch.object(Command, "__call__")
def test_00_get_ps_redundancy_success(self, mock_call):
with open(IPMITOOL_SDR_PS_SAMPLE_OUTPUT, "r") as content:
mock_call.return_value = Result(content.read(), None)
ipmitool = IpmiTool()
ps_redundancy = ipmitool.get_ps_redundancy()
self.assertEqual(ps_redundancy, (True, True))

Pjack marked this conversation as resolved.
Show resolved Hide resolved
@patch.object(Command, "__call__")
def test_01_get_ps_redundancy_error(self, mock_call):
mock_call.return_value = Result("", True)
ipmitool = IpmiTool()
ps_redundancy = ipmitool.get_ps_redundancy()
self.assertEqual(ps_redundancy, (False, False))
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# dmidecode 3.3
Getting SMBIOS data from sysfs.
SMBIOS 3.3.0 present.

Handle 0x2700, DMI type 39, 22 bytes
System Power Supply
Location: Not Specified
Name: PWR SPLY,1400W,RDNT,LTON
Manufacturer: DELL
Serial Number: CNLOD0019M3A6D
Asset Tag: Not Specified
Model Part Number: 01CW9GA04
Revision: Not Specified
Max Power Capacity: 1400 W
Status: Present, Unknown
Type: Unknown
Input Voltage Range Switching: Unknown
Plugged: Yes
Hot Replaceable: Yes

Handle 0x2701, DMI type 39, 22 bytes
System Power Supply
Location: Not Specified
Name: PWR SPLY,1400W,RDNT,LTON
Manufacturer: DELL
Serial Number: CNLOD0019M369D
Asset Tag: Not Specified
Model Part Number: 01CW9GA04
Revision: Not Specified
Max Power Capacity: 1400 W
Status: Present, Unknown
Type: Unknown
Input Voltage Range Switching: Unknown
Plugged: Yes
Hot Replaceable: Yes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
PS Redundancy,77h,ok,7.1,Fully Redundant
Status,85h,ok,10.1,Presence detected
Status,86h,ok,10.2,Presence detected