Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ Bugfix ] Fix Prometheus Metrics With zeromq Frontend #7279

Merged
merged 42 commits into from
Aug 18, 2024
Merged
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
0ee81de
fix
robertgshaw2-redhat Aug 7, 2024
2de4dc4
stash
robertgshaw2-redhat Aug 8, 2024
ebd062e
remove __init__
robertgshaw2-redhat Aug 8, 2024
c79d165
scripts fix
robertgshaw2-redhat Aug 8, 2024
6da5189
cleanup
robertgshaw2-redhat Aug 8, 2024
346e5fc
more cleanup
robertgshaw2-redhat Aug 8, 2024
b1d945d
clean
robertgshaw2-redhat Aug 8, 2024
460b621
clean
robertgshaw2-redhat Aug 8, 2024
66fa98b
match nick
robertgshaw2-redhat Aug 8, 2024
db86714
match nick exactly
robertgshaw2-redhat Aug 8, 2024
4029167
grabbed nicks changes
njhill Aug 8, 2024
c2b304a
switch to tempfile
robertgshaw2-redhat Aug 8, 2024
dea6896
add comment
robertgshaw2-redhat Aug 8, 2024
1082e63
format
robertgshaw2-redhat Aug 8, 2024
b26cb53
deprecate Info metrics
robertgshaw2-redhat Aug 8, 2024
64ba139
fixt
robertgshaw2-redhat Aug 8, 2024
2263569
format
robertgshaw2-redhat Aug 8, 2024
ba5c741
add multiprocess mode to gauges
robertgshaw2-redhat Aug 8, 2024
694fc12
fix typo
robertgshaw2-redhat Aug 8, 2024
4032b4d
test that metrics are exported
robertgshaw2-redhat Aug 8, 2024
d1fe504
run both in the ci
robertgshaw2-redhat Aug 8, 2024
c65f8ea
format
robertgshaw2-redhat Aug 8, 2024
e3025f7
fix test
robertgshaw2-redhat Aug 8, 2024
350c66d
adding tests
robertgshaw2-redhat Aug 8, 2024
2da7d13
comments in test
robertgshaw2-redhat Aug 8, 2024
3d6aade
format
robertgshaw2-redhat Aug 8, 2024
a76f38a
fix example
robertgshaw2-redhat Aug 8, 2024
6eea97c
remove unregistering
robertgshaw2-redhat Aug 8, 2024
bccc2d2
Merge branch 'main' into fix-prom-metrics
robertgshaw2-redhat Aug 14, 2024
0745f7d
cleanup for prom multiprocessing
robertgshaw2-redhat Aug 14, 2024
5c253d9
format
robertgshaw2-redhat Aug 14, 2024
af3474a
stash
robertgshaw2-redhat Aug 18, 2024
13c0444
updated
robertgshaw2-redhat Aug 18, 2024
c4477c4
updated
robertgshaw2-redhat Aug 18, 2024
281a26a
fix
robertgshaw2-redhat Aug 18, 2024
e793498
fix naming
robertgshaw2-redhat Aug 18, 2024
53a56d5
comment
robertgshaw2-redhat Aug 18, 2024
59479a6
format
robertgshaw2-redhat Aug 18, 2024
f74d426
fix cache_config_info
robertgshaw2-redhat Aug 18, 2024
03b8895
Merge branch 'main' into fix-prom-metrics
robertgshaw2-redhat Aug 18, 2024
224c987
properly pass multiprocess_mode to RayGaugeCLS
robertgshaw2-redhat Aug 18, 2024
ad26ad7
./format
robertgshaw2-redhat Aug 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
stash
robertgshaw2-redhat committed Aug 18, 2024
commit af3474a37a14332dfd28765efb2429be6392aa56
23 changes: 22 additions & 1 deletion vllm/engine/metrics.py
Original file line number Diff line number Diff line change
@@ -39,6 +39,13 @@ class Metrics:
_histogram_cls = prometheus_client.Histogram

def __init__(self, labelnames: List[str], max_model_len: int):

# Config Stats
self.gauge_cache_info = self._gauge_cls(
name="vllm:cache_config_info",
documentation="Info about the cache configuration for vLLM.",
labelnames=labelnames,
multiprocess_mode="sum")

# System stats
# Scheduler State
@@ -176,9 +183,23 @@ def __init__(self, labelnames: List[str], max_model_len: int):
multiprocess_mode="sum",
)


# end-metrics-definitions

def _create_info_cache_config(self) -> None:
# Config Information
self.info_cache_config = self._gauge_cls(
name='vllm:cache_config',
documentation='Information of the LLMEngine CacheConfig',
labelnames=
multiprocess_mode="sum"
)


def _unregister_vllm_metrics(self) -> None:
for collector in list(prometheus_client.REGISTRY._collector_to_names):
if hasattr(collector, "_name") and "vllm" in collector._name:
prometheus_client.REGISTRY.unregister(collector)


class _RayGaugeWrapper:
"""Wraps around ray.util.metrics.Gauge to provide same API as
8 changes: 7 additions & 1 deletion vllm/engine/metrics_types.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Optional
from typing import Dict, List, Optional, Protocol

from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics

@@ -60,3 +60,9 @@ def maybe_update_spec_decode_metrics(self, stats: Stats):
to be emitted at same time as log interval)."""
if stats.spec_decode_metrics is not None:
self.spec_decode_metrics = stats.spec_decode_metrics


class SupportsMetricsInfo(Protocol):

def metrics_info(self) -> Dict[str, str]:
...