Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: General err handling for collector #52

Merged
merged 4 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 45 additions & 19 deletions prometheus_hardware_exporter/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from logging import getLogger
from typing import Any, Dict, Iterable, List, Type

from prometheus_client.metrics_core import Metric
from prometheus_client.metrics_core import GaugeMetricFamily, Metric
from prometheus_client.registry import Collector

from .config import Config
Expand Down Expand Up @@ -89,6 +89,25 @@ def specifications(self) -> List[Specification]:
A list of specification.
"""

@property
def failed_metrics(self) -> Iterable[Metric]:
"""Defines the metrics to be returned when collector fails.

Yields:
metrics: the internal metrics
"""
name = self.__class__.__name__
metric = GaugeMetricFamily(
name=f"{name.lower()}_collector_failed",
documentation=f"{name} Collector failed to fetch metrics",
labels=["collector"],
)
metric.add_metric(
labels=[self.__class__.__name__],
value=1,
)
yield metric

def init_default_datastore(self, payloads: List[Payload]) -> None:
"""Initialize or fill data the store with default values.

Expand All @@ -111,21 +130,28 @@ def collect(self) -> Iterable[Metric]:
Yields:
metrics: the internal metrics
"""
payloads = self.fetch()
self.init_default_datastore(payloads)
processed_payloads = self.process(payloads, self._datastore)

# unpacked and create metrics
for payload in processed_payloads:
spec = self._specs[payload.name]
# We have to ignore the type checking here, since the subclass of
# any metric family from prometheus client adds new attributes and
# methods.
metric = spec.metric_class( # type: ignore[call-arg]
name=spec.name, labels=spec.labels, documentation=spec.documentation
)
metric.add_metric( # type: ignore[attr-defined]
labels=payload.labels, value=payload.value
)
yield metric
self._datastore[payload.uuid] = payload
# The general exception hanlder will try to make sure the single
# collector's bug will only change the metrics output to failed_metrics
# and also make sure other collectors are still working.
try:
payloads = self.fetch()
self.init_default_datastore(payloads)
processed_payloads = self.process(payloads, self._datastore)

# unpacked and create metrics
for payload in processed_payloads:
spec = self._specs[payload.name]
# We have to ignore the type checking here, since the subclass of
# any metric family from prometheus client adds new attributes and
# methods.
metric = spec.metric_class( # type: ignore[call-arg]
name=spec.name, labels=spec.labels, documentation=spec.documentation
)
metric.add_metric( # type: ignore[attr-defined]
labels=payload.labels, value=payload.value
)
yield metric
self._datastore[payload.uuid] = payload
except Exception as err: # pylint: disable=W0718
logger.error(err)
yield from self.failed_metrics
rgildein marked this conversation as resolved.
Show resolved Hide resolved
28 changes: 28 additions & 0 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1235,3 +1235,31 @@ def test_210_redfish_create_smart_storage_health_metric_payload(self):
)
],
)

def test_1000_collector_fetch_failed(self):
for collector_cls, expected_name, expected_labels in [
(
MegaRAIDCollector,
"megaraidcollector_collector_failed",
{"collector": "MegaRAIDCollector"},
),
(
RedfishCollector,
"redfishcollector_collector_failed",
{"collector": "RedfishCollector"},
),
(
IpmiSensorsCollector,
"ipmisensorscollector_collector_failed",
{"collector": "IpmiSensorsCollector"},
),
]:
collector = collector_cls(Mock())
collector.fetch = Mock()
collector.fetch.side_effect = Exception("Unknown error")
payloads = collector.collect()
payloads = list(payloads)
self.assertEqual(len(payloads), 1)
self.assertEqual(payloads[0].name, expected_name)
self.assertEqual(payloads[0].samples[0].value, 1.0)
self.assertEqual(payloads[0].samples[0].labels, expected_labels)