Skip to content

Commit

Permalink
Add more prometheus metrics (#879)
Browse files Browse the repository at this point in the history
  • Loading branch information
BlackYps authored Mar 13, 2022
1 parent 83d4231 commit 745d183
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 12 deletions.
3 changes: 2 additions & 1 deletion server/game_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,10 @@ async def publish_game_results(self, game_results: EndedGameInfo):
result_dict,
)

# TODO: Remove when rating service starts listening to message queue
if (
game_results.validity is ValidityState.VALID
and game_results.rating_type is not None
):
metrics.rated_games.labels(game_results.rating_type).inc()
# TODO: Remove when rating service starts listening to message queue
await self._rating_service.enqueue(result_dict)
8 changes: 7 additions & 1 deletion server/ladder_service/ladder_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import humanize
from sqlalchemy import and_, func, select, text, true

from server import metrics
from server.config import config
from server.core import Service
from server.db import FAFDatabase
Expand Down Expand Up @@ -42,14 +43,15 @@
OnMatchedCallback,
Search
)
from server.metrics import MatchLaunch
from server.players import Player, PlayerState
from server.types import GameLaunchOptions, Map, NeroxisGeneratedMap


@with_logger
class LadderService(Service):
"""
Service responsible for managing the 1v1 ladder. Does matchmaking, updates
Service responsible for managing the automatches. Does matchmaking, updates
statistics, and launches the games.
"""

Expand Down Expand Up @@ -532,26 +534,30 @@ def make_game_options(player: Player) -> GameLaunchOptions:

await self.launch_match(game, host, all_guests, make_game_options)
self._logger.debug("Ladder game launched successfully %s", game)
metrics.matches.labels(queue.name, MatchLaunch.SUCCESSFUL).inc()
except Exception as e:
abandoning_players = []
if isinstance(e, NotConnectedError):
self._logger.info(
"Ladder game failed to start! %s setup timed out",
game
)
metrics.matches.labels(queue.name, MatchLaunch.TIMED_OUT).inc()
abandoning_players = e.players
elif isinstance(e, GameClosedError):
self._logger.info(
"Ladder game %s failed to start! "
"Player %s closed their game instance",
game, e.player
)
metrics.matches.labels(queue.name, MatchLaunch.ABORTED_BY_PLAYER).inc()
abandoning_players = [e.player]
else:
# All timeout errors should be transformed by the match starter.
assert not isinstance(e, asyncio.TimeoutError)

self._logger.exception("Ladder game failed to start %s", game)
metrics.matches.labels(queue.name, MatchLaunch.ERRORED).inc()

if game:
await game.on_game_finish()
Expand Down
23 changes: 17 additions & 6 deletions server/matchmaker/matchmaker_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __exit__(self, exc_type, exc_value, traceback):
else:
status = "errored"

metric = metrics.matchmaker_searches.labels(self.queue_name, status)
metric = metrics.matchmaker_search_duration.labels(self.queue_name, status)
metric.observe(total_time)


Expand Down Expand Up @@ -184,12 +184,17 @@ async def find_matches(self) -> None:

self._register_unmatched_searches(unmatched_searches)

number_of_matches = len(matches)
metrics.matches.labels(self.name).set(number_of_matches)

for search1, search2 in matches:
# TODO: Move this into algorithm, then don't need to recalculate
# quality_with? Probably not a major bottleneck though.
self._report_party_sizes(search1)
self._report_party_sizes(search2)

rating_imbalance = abs(search1.cumulative_rating - search2.cumulative_rating)
metrics.match_rating_imbalance.labels(self.name).set(rating_imbalance)

ratings = search1.displayed_ratings + search2.displayed_ratings
rating_variety = max(ratings) - min(ratings)
metrics.match_rating_variety.labels(self.name).set(rating_variety)

metrics.match_quality.labels(self.name).observe(
search1.quality_with(search2)
)
Expand All @@ -198,6 +203,12 @@ async def find_matches(self) -> None:
except Exception:
self._logger.exception("Match callback raised an exception!")

def _report_party_sizes(self, team):
for search in team.get_original_searches():
metrics.matched_matchmaker_searches.labels(
self.name, len(search.players)
).inc()

def _register_unmatched_searches(
self,
unmatched_searches: list[Search],
Expand Down
44 changes: 40 additions & 4 deletions server/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,48 @@

from prometheus_client import Counter, Gauge, Histogram, Info


class MatchLaunch:
SUCCESSFUL = "successful"
TIMED_OUT = "timed out"
ABORTED_BY_PLAYER = "aborted by player"
ERRORED = "errored"


info = Info("build", "Information collected on server start")

# ==========
# Matchmaker
# ==========
matches = Gauge("server_matchmaker_queue_matches", "Number of matches made", ["queue"])
matches = Counter(
"server_matchmaker_queue_matches_total",
"Number of matches made",
["queue", "status"]
)

matched_matchmaker_searches = Counter(
"server_matchmaker_queue_searches_matched_total",
"Search parties that got matched",
["queue", "player_size"]
)

match_quality = Histogram(
"server_matchmaker_queue_quality",
"Quality of matches made",
["queue"],
buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95],
buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0],
)

match_rating_imbalance = Gauge(
"server_matchmaker_matches_imbalance",
"Rating difference between the two teams",
["queue"],
)

match_rating_variety = Gauge(
"server_matchmaker_matches_rating_variety",
"Maximum rating difference between two players in the game",
["queue"],
)

unmatched_searches = Gauge(
Expand All @@ -24,11 +54,11 @@
["queue"],
)

matchmaker_searches = Histogram(
matchmaker_search_duration = Histogram(
"server_matchmaker_queue_search_duration_seconds",
"Time spent searching for matches per search in seconds",
["queue", "status"],
buckets=[30, 60, 120, 180, 240, 300, 600, 1800, 3600],
buckets=[30, 60, 120, 180, 240, 300, 420, 600, 900, 1800, 3600],
)

matchmaker_players = Gauge(
Expand Down Expand Up @@ -113,6 +143,12 @@
["game_mode", "game_state"],
)

rated_games = Counter(
"server_game_rated_games_total",
"Number of rated games",
["leaderboard"]
)


# ==============
# Rating Service
Expand Down

0 comments on commit 745d183

Please sign in to comment.