Skip to content

Commit

Permalink
feat: Add metrics ipmi_dcmi_power_consumption_rate
Browse files Browse the repository at this point in the history
Add new ipmi-dcmi metrics
Rate = power consumption / maximum power consumption
  • Loading branch information
jneo8 committed Nov 15, 2023
1 parent 7efb9ef commit fd680e9
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 4 deletions.
43 changes: 40 additions & 3 deletions prometheus_hardware_exporter/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
SessionCreationError,
)

from .collectors.ipmi_dcmi import IpmiDcmi
from .collectors.ipmi_dcmi import IpmiDcmi, IpmiTool
from .collectors.ipmi_sel import IpmiSel
from .collectors.ipmimonitoring import IpmiMonitoring
from .collectors.lshw import LSHW
from .collectors.perccli import PercCLI
from .collectors.redfish import RedfishHelper
from .collectors.sasircu import LSISASCollectorHelper, Sasircu
Expand Down Expand Up @@ -324,13 +325,15 @@ class IpmiDcmiCollector(BlockingCollector):
"""Collector for ipmi dcmi metrics."""

ipmi_dcmi = IpmiDcmi()
ipmi_tool = IpmiTool()
lshw = LSHW()

@property
def specifications(self) -> List[Specification]:
"""Define dcmi metric specs."""
return [
Specification(
name="ipmi_dcmi_power_cosumption_watts",
name="ipmi_dcmi_power_consumption_watts",
documentation="Current power consumption in watts",
metric_class=GaugeMetricFamily,
),
Expand All @@ -339,6 +342,12 @@ def specifications(self) -> List[Specification]:
documentation="Indicates if the ipmi dcmi command is successful or not",
metric_class=GaugeMetricFamily,
),
Specification(
name="ipmi_dcmi_power_consumption_rate",
documentation="Current power capacity rate",
labels=["ps_redundancy", "get_ps_redundancy_ok", "maximum_power_capacity"],
metric_class=GaugeMetricFamily,
),
]

def fetch(self) -> List[Payload]:
Expand All @@ -349,11 +358,39 @@ def fetch(self) -> List[Payload]:
logger.error("Failed to fetch current power from ipmi dcmi")
return [Payload(name="ipmi_dcmi_command_success", value=0.0)]

get_ps_redundancy_ok, ps_redundancy = self.ipmi_tool.get_ps_redundancy()
# Because we fail to get the redundancy config from the server,
# Suppose redundancy enable make denominator smaller
# and alert is more easy to fire.
if not get_ps_redundancy_ok:
ps_redundancy = True

power_capacities = self.lshw.get_power_capacities()
maximum_power_capacity = (
sum(power_capacities) / len(power_capacities)
if ps_redundancy
else sum(power_capacities)
)

power_capacity_rate = (
maximum_power_capacity
and current_power_payload["current_power"] / maximum_power_capacity
or 0
)

ps_redundancy_str = "1" if ps_redundancy else "0"
get_ps_redundancy_ok_str = "1" if get_ps_redundancy_ok else "0"

payloads = [
Payload(
name="ipmi_dcmi_power_cosumption_watts",
name="ipmi_dcmi_power_consumption_watts",
value=current_power_payload["current_power"],
),
Payload(
name="ipmi_dcmi_power_consumption_rate",
value=power_capacity_rate,
labels=[ps_redundancy_str, get_ps_redundancy_ok_str, str(maximum_power_capacity)],
),
Payload(name="ipmi_dcmi_command_success", value=1.0),
]
return payloads
Expand Down
28 changes: 27 additions & 1 deletion prometheus_hardware_exporter/collectors/ipmi_dcmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re
from logging import getLogger
from typing import Dict
from typing import Dict, Tuple

from ..utils import Command

Expand All @@ -11,6 +11,32 @@
CURRENT_POWER_REGEX = re.compile(r"^Current Power\s*:\s*(?P<value>[0-9.]*)\s*Watts.*")


class IpmiTool(Command):
"""Command line tool for ipmitool."""

prefix = ""
command = "ipmitool"

def get_ps_redundancy(self) -> Tuple[bool, bool]:
"""Get power supply redundancy.
returns:
- ok - Get redundancy success?
- redundancy - enable redundancy?
"""
result = self("""sdr type "Power Supply" -c""")
if result.error:
logger.error(result.error)
return False, False
output = []
for line in result.data.splitlines():
data = line.split(",")
if "Redundancy" in data[0]:
# column 4 is redundancy status
output.append(data[4])
return True, all(status == "Fully Redundant" for status in output) | False


class IpmiDcmi(Command):
"""Command line tool for ipmi dcmi."""

Expand Down
32 changes: 32 additions & 0 deletions prometheus_hardware_exporter/collectors/lshw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Lshw metrics collector."""
import json
from logging import getLogger
from typing import Dict, List

from ..utils import Command

logger = getLogger(__name__)


class LSHW(Command):
"""Command line tool for lshw."""

prefix = ""
command = "lshw"

def get_powers(self) -> List[Dict[str, str]]:
"""Get power class data."""
result = self("-c power -json")
if result.error:
logger.error(result.error)
return []
data = json.loads(result.data)
return data

def get_power_capacities(self) -> List[int]:
"""Get list of power capacities."""
powers = self.get_powers()
capacities = []
for power in powers:
capacities.append(int(power.get("capacity", 0)))
return capacities

0 comments on commit fd680e9

Please sign in to comment.