Tune matchmaker (FAForever#926)

* Add minority bonus and tune other config values * Lower queue pop timer * Fix tests * WIP Calculate rating peak automatically * Calculate rating peak from leaderboard rating journal * Use ladder service for db queries * Fix errors * Calculate row count properly * Fix failing integration test * Refactor db connection * Fix indentation * Add metric for rating peak
1-alex98 · Nov 5, 2022 · 6768256 · 6768256
1 parent 66e3a06
commit 6768256
Show file tree

Hide file tree

Showing 15 changed files with 138 additions and 51 deletions.
diff --git a/server/config.py b/server/config.py
@@ -92,14 +92,15 @@ def __init__(self):
         # Values for the custom (i.e. not trueskill) game quality metric used by the matchmaker
         self.MINIMUM_GAME_QUALITY = 0.4
         # Difference of cumulated rating of the teams
-        self.MAXIMUM_RATING_IMBALANCE = 500
+        self.MAXIMUM_RATING_IMBALANCE = 250
         # stdev of the ratings of all participating players
-        self.MAXIMUM_RATING_DEVIATION = 500
+        self.MAXIMUM_RATING_DEVIATION = 250
         # Quality bonus for each failed matching attempt per full team
-        self.TIME_BONUS = 0.02
+        self.TIME_BONUS = 0.01
         self.MAXIMUM_TIME_BONUS = 0.2
-        self.NEWBIE_TIME_BONUS = 0.9
-        self.MAXIMUM_NEWBIE_TIME_BONUS = 2.7
+        self.NEWBIE_TIME_BONUS = 0.25
+        self.MAXIMUM_NEWBIE_TIME_BONUS = 3.0
+        self.MINORITY_BONUS = 1.0
 
         self.TWILIO_ACCOUNT_SID = ""
         self.TWILIO_TOKEN = ""
@@ -119,7 +120,7 @@ def __init__(self):
         self.LADDER_TOP_PLAYER_SEARCH_EXPANSION_MAX = 0.3
         self.LADDER_TOP_PLAYER_SEARCH_EXPANSION_STEP = 0.15
         # The maximum amount of time in seconds) to wait between pops.
-        self.QUEUE_POP_TIME_MAX = 180
+        self.QUEUE_POP_TIME_MAX = 90
         # The number of possible matches we would like to have when the queue
         # pops. The queue pop time will be adjusted based on the current rate of
         # players queuing to try and hit this number.

diff --git a/server/db/models.py b/server/db/models.py
@@ -217,6 +217,7 @@
 
 leaderboard_rating_journal = Table(
     "leaderboard_rating_journal", metadata,
+    Column("id",                        Integer,    primary_key=True),
     Column("game_player_stats_id",      Integer,    ForeignKey("game_player_stats.id")),
     Column("leaderboard_id",            Integer,    ForeignKey("leaderboard.id")),
     Column("rating_mean_before",        Float,      nullable=False),

diff --git a/server/ladder_service/ladder_service.py b/server/ladder_service/ladder_service.py
@@ -5,6 +5,7 @@
 import json
 import random
 import re
+import statistics
 from collections import defaultdict
 from typing import Awaitable, Callable, Optional
 
@@ -20,7 +21,8 @@
     game_featuredMods,
     game_player_stats,
     game_stats,
-    leaderboard
+    leaderboard,
+    leaderboard_rating_journal
 )
 from server.db.models import map as t_map
 from server.db.models import (
@@ -97,6 +99,7 @@ async def update_data(self) -> None:
                 queue.featured_mod = info["mod"]
                 queue.rating_type = info["rating_type"]
                 queue.team_size = info["team_size"]
+                queue.rating_peak = await self.fetch_rating_peak(info["rating_type"])
             queue.map_pools.clear()
             for map_pool_id, min_rating, max_rating in info["map_pools"]:
                 map_pool_name, map_list = map_pool_maps[map_pool_id]
@@ -221,6 +224,50 @@ async def fetch_matchmaker_queues(self, conn):
                 errored.add(name)
         return matchmaker_queues
 
+    async def fetch_rating_peak(self, rating_type):
+        async with self._db.acquire() as conn:
+            result = await conn.execute(
+                select([
+                    leaderboard_rating_journal.c.rating_mean_before,
+                    leaderboard_rating_journal.c.rating_deviation_before
+                ])
+                .select_from(leaderboard_rating_journal.join(leaderboard))
+                .where(leaderboard.c.technical_name == rating_type)
+                .order_by(leaderboard_rating_journal.c.id.desc())
+                .limit(1000)
+            )
+            rows = result.fetchall()
+            rowcount = len(rows)
+
+            rating_peak = 1000.0
+            if rowcount > 0:
+                rating_peak = statistics.mean(
+                    row.rating_mean_before - 3 * row.rating_deviation_before for row in rows
+                )
+            metrics.leaderboard_rating_peak.labels(rating_type).set(rating_peak)
+
+            if rowcount < 100:
+                self._logger.warning(
+                    "Could only fetch %s ratings for %s queue.",
+                    rowcount,
+                    rating_type
+                )
+
+            if rating_peak < 600 or rating_peak > 1200:
+                self._logger.warning(
+                    "Estimated rating peak for %s is %s. This could lead to issues with matchmaking.",
+                    rating_type,
+                    rating_peak
+                )
+            else:
+                self._logger.info(
+                    "Estimated rating peak for %s is %s.",
+                    rating_type,
+                    rating_peak
+                )
+
+            return rating_peak
+
     def start_search(
         self,
         players: list[Player],

diff --git a/server/matchmaker/algorithm/bucket_teams.py b/server/matchmaker/algorithm/bucket_teams.py
@@ -21,12 +21,12 @@ class BucketTeamMatchmaker(Matchmaker):
     """
 
     def find(
-        self, searches: Iterable[Search], team_size: int
+        self, searches: Iterable[Search], team_size: int, rating_peak: float
     ) -> tuple[list[Match], list[Search]]:
         teams, searches_without_team = self._find_teams(searches, team_size)
 
         matchmaker1v1 = StableMarriageMatchmaker()
-        matches, unmatched_searches = matchmaker1v1.find(teams, 1)
+        matches, unmatched_searches = matchmaker1v1.find(teams, 1, rating_peak)
 
         unmatched_searches.extend(searches_without_team)
         return matches, unmatched_searches

diff --git a/server/matchmaker/algorithm/matchmaker.py b/server/matchmaker/algorithm/matchmaker.py
@@ -11,7 +11,8 @@ class Matchmaker(ABC):
     def find(
         self,
         searches: Iterable[Search],
-        team_size: int
+        team_size: int,
+        rating_peak: float
     ) -> tuple[list[Match], list[Search]]:
         pass
 

diff --git a/server/matchmaker/algorithm/stable_marriage.py b/server/matchmaker/algorithm/stable_marriage.py
@@ -81,7 +81,7 @@ class StableMarriageMatchmaker(Matchmaker):
     """
 
     def find(
-        self, searches: Iterable[Search], team_size: int
+        self, searches: Iterable[Search], team_size: int, rating_peak: float
     ) -> tuple[list[Match], list[Search]]:
         if team_size != 1:
             self._logger.error(

diff --git a/server/matchmaker/algorithm/team_matchmaker.py b/server/matchmaker/algorithm/team_matchmaker.py
@@ -57,12 +57,14 @@ class TeamMatchMaker(Matchmaker):
     9. repeat 8. until the list is empty
     """
 
-    def find(self, searches: Iterable[Search], team_size: int) -> tuple[list[Match], list[Search]]:
+    def find(
+        self, searches: Iterable[Search], team_size: int, rating_peak: float
+    ) -> tuple[list[Match], list[Search]]:
         if not searches:
             return [], []
 
         if team_size == 1:
-            return StableMarriageMatchmaker().find(searches, 1)
+            return StableMarriageMatchmaker().find(searches, 1, rating_peak)
 
         searches = SortedList(searches, key=lambda s: s.average_rating)
         possible_games = []
@@ -77,7 +79,7 @@ def find(self, searches: Iterable[Search], team_size: int) -> tuple[list[Match],
             try:
                 participants = self.pick_neighboring_players(searches, index, team_size)
                 match = self.make_teams(participants, team_size)
-                game = self.assign_game_quality(match, team_size)
+                game = self.assign_game_quality(match, team_size, rating_peak)
                 possible_games.append(game)
             except NotEnoughPlayersException:
                 self._logger.warning("Couldn't pick enough players for a full game. Skipping this game...")
@@ -268,33 +270,39 @@ def _find_most_balanced_filler(self, avg: int, search: Search, single_player_sea
         self._logger.debug("used %s as best filler", [candidate])
         return candidate
 
-    def assign_game_quality(self, match: Match, team_size: int) -> GameCandidate:
+    def assign_game_quality(self, match: Match, team_size: int, rating_peak: float) -> GameCandidate:
         newbie_bonus = 0
         time_bonus = 0
+        minority_bonus = 0
         ratings = []
         for team in match:
             for search in team.get_original_searches():
                 ratings.append(search.average_rating)
-                # Time bonus accumulation for a game should not depend on team size or whether the participants are premade or not.
-                search_time_bonus = search.failed_matching_attempts * config.TIME_BONUS * len(search.players) / team_size
-                time_bonus += min(search_time_bonus, config.MAXIMUM_TIME_BONUS * len(search.players) / team_size)
+                # Time bonus accumulation for a game should not depend on
+                # team size or whether the participants are premade or not.
+                normalize_size = len(search.players) / team_size
+                search_time_bonus = search.failed_matching_attempts * config.TIME_BONUS * normalize_size
+                time_bonus += min(search_time_bonus, config.MAXIMUM_TIME_BONUS * normalize_size)
                 num_newbies = search.num_newbies()
                 search_newbie_bonus = search.failed_matching_attempts * config.NEWBIE_TIME_BONUS * num_newbies / team_size
                 newbie_bonus += min(search_newbie_bonus, config.MAXIMUM_NEWBIE_TIME_BONUS * num_newbies / team_size)
 
+                minority_bonus = ((search.average_rating - rating_peak) * 0.001) ** 4 * normalize_size * config.MINORITY_BONUS
+
         rating_disparity = abs(match[0].cumulative_rating - match[1].cumulative_rating)
         unfairness = rating_disparity / config.MAXIMUM_RATING_IMBALANCE
         deviation = statistics.pstdev(ratings)
         rating_variety = deviation / config.MAXIMUM_RATING_DEVIATION
 
         # Visually this creates a cone in the unfairness-rating_variety plane
         # that slowly raises with the time bonuses.
-        quality = 1 - sqrt(unfairness ** 2 + rating_variety ** 2) + time_bonus
+        quality = 1 - sqrt(unfairness ** 2 + rating_variety ** 2) + time_bonus + minority_bonus
         if not any(team.has_high_rated_player() for team in match):
             quality += newbie_bonus
         self._logger.debug(
             "bonuses: %s rating disparity: %s -> unfairness: %f deviation: %f -> variety: %f -> game quality: %f",
-            newbie_bonus + time_bonus, rating_disparity, unfairness, deviation, rating_variety, quality)
+            newbie_bonus + time_bonus + minority_bonus, rating_disparity, unfairness, deviation, rating_variety, quality
+        )
         return GameCandidate(match, quality)
 
     def pick_noncolliding_games(self, games: list[GameCandidate]) -> list[Match]:

diff --git a/server/matchmaker/matchmaker_queue.py b/server/matchmaker/matchmaker_queue.py
@@ -58,6 +58,7 @@ def __init__(
         self.featured_mod = featured_mod
         self.rating_type = rating_type
         self.team_size = team_size
+        self.rating_peak = 1000.0
         self.params = params or {}
         self.map_pools = {info[0].id: info for info in map_pools}
 
@@ -172,6 +173,7 @@ async def find_matches(self) -> None:
             self.matchmaker.find,
             searches,
             self.team_size,
+            self.rating_peak,
         )
 
         # filter out matches that were cancelled

diff --git a/server/metrics.py b/server/metrics.py
@@ -73,6 +73,13 @@ class MatchLaunch:
     ["queue"],
 )
 
+leaderboard_rating_peak = Gauge(
+    "server_leaderboard_rating_peak",
+    "Average rating of the recently active players in this leaderboard"
+    "i.e. the peak of the bell curve",
+    ["rating_type"]
+)
+
 # =====
 # Users
 # =====

diff --git a/tests/data/test-data.sql b/tests/data/test-data.sql
@@ -132,7 +132,7 @@ insert into leaderboard_rating (login_id, mean, deviation, total_games, leaderbo
   (101, 1500, 500, 0, 2),
   (102, 1500, 500, 0, 1),
   (102, 1500, 500, 0, 2),
-  (105, 1400, 150, 20, 3),
+  (105, 500, 100, 20, 3),
   (106, 900, 75, 20, 3)
 ;
 
@@ -276,10 +276,15 @@ insert into game_player_stats (gameId, playerId, AI, faction, color, team, place
   (41954, 2, 0, 0, 0, 1, 0, 1500, 500, NOW() + interval 2 minute),
   (41955, 2, 0, 0, 0, 1, 0, 1500, 500, NOW() + interval 3 minute);
 
-insert into game_player_stats (gameId, playerId, AI, faction, color, team, place, mean, deviation, scoreTime, after_mean) values
-  (41942, 51, 0, 0, 0, 2, 0, 1500, 500, NOW(), NULL),
-  (41943, 51, 0, 0, 0, 2, 0, 1500, 500, NOW(), 1400),
-  (41944, 51, 0, 0, 0, 2, 0, 1500, 500, NOW(), 1600);
+insert into game_player_stats (id, gameId, playerId, AI, faction, color, team, place, mean, deviation, scoreTime, after_mean) values
+  (1, 41942, 51, 0, 0, 0, 2, 0, 1500, 500, NOW(), NULL),
+  (2, 41943, 51, 0, 0, 0, 2, 0, 1500, 500, NOW(), 1400),
+  (3, 41944, 51, 0, 0, 0, 2, 0, 1500, 500, NOW(), 1600);
+
+insert into leaderboard_rating_journal (game_player_stats_id, leaderboard_id, rating_mean_before, rating_mean_after, rating_deviation_before, rating_deviation_after) values
+  (1, 1, 1200, 1210, 100, 100),
+  (2, 1, 1600, 1500, 500, 400),
+  (3, 3, 1400, 1410, 100, 100);
 
 insert into matchmaker_queue (id, technical_name, featured_mod_id, leaderboard_id, name_key, team_size, params, enabled) values
   (1, "ladder1v1", 6, 2, "matchmaker.ladder1v1", 1, NULL, true),

diff --git a/tests/integration_tests/test_teammatchmaker.py b/tests/integration_tests/test_teammatchmaker.py
@@ -236,6 +236,11 @@ async def test_game_matchmaking_with_parties(lobby_server):
 
 @fast_forward(30)
 async def test_newbie_matchmaking_with_parties(lobby_server):
+    """
+    This tests that newbies get matched even if the resulting
+    game would be too unbalanced normally
+    """
+
     # Two completely new tmm players
     id1, _, proto1 = await connect_and_sign_in(
         ("ladder1", "ladder1"), lobby_server

diff --git a/tests/unit_tests/test_ladder_service.py b/tests/unit_tests/test_ladder_service.py
@@ -54,6 +54,8 @@ async def test_load_from_database(ladder_service, queue_factory):
         queue = ladder_service.queues["ladder1v1"]
         assert queue.name == "ladder1v1"
         assert queue.get_game_options() is None
+        assert queue.rating_type == "ladder_1v1"
+        assert queue.rating_peak == 1000.0
         assert len(queue.map_pools) == 3
         assert list(queue.map_pools[1][0].maps.values()) == [
             Map(id=15, name="SCMP_015", path="maps/scmp_015.zip"),
@@ -91,7 +93,13 @@ async def test_load_from_database(ladder_service, queue_factory):
             }),
         ]
 
+        queue = ladder_service.queues["tmm2v2"]
+        assert queue.rating_type == "tmm_2v2"
+        assert queue.rating_peak == 1100.0
+
         queue = ladder_service.queues["gameoptions"]
+        assert queue.rating_type == "global"
+        assert queue.rating_peak == 500.0
         assert queue.get_game_options() == {
             "Share": "ShareUntilDeath",
             "UnitCap": 500

diff --git a/tests/unit_tests/test_matchmaker_algorithm_bucket_teams.py b/tests/unit_tests/test_matchmaker_algorithm_bucket_teams.py
@@ -254,7 +254,7 @@ def test_BucketTeamMatchmaker_1v1(player_factory):
 
     team_size = 1
     matchmaker = BucketTeamMatchmaker()
-    matches, unmatched_searches = matchmaker.find(searches, team_size)
+    matches, unmatched_searches = matchmaker.find(searches, team_size, 1000)
 
     assert len(matches) == num_players / 2 / team_size
     assert len(unmatched_searches) == num_players - 2 * team_size * len(matches)
@@ -267,7 +267,7 @@ def test_BucketTeamMatchmaker_2v2_single_searches(player_factory):
 
     team_size = 2
     matchmaker = BucketTeamMatchmaker()
-    matches, unmatched_searches = matchmaker.find(searches, team_size)
+    matches, unmatched_searches = matchmaker.find(searches, team_size, 1000)
 
     assert len(matches) == num_players / 2 / team_size
     assert len(unmatched_searches) == num_players - 2 * team_size * len(matches)
@@ -280,7 +280,7 @@ def test_BucketTeamMatchmaker_2v2_full_party_searches(player_factory):
 
     team_size = 2
     matchmaker = BucketTeamMatchmaker()
-    matches, unmatched_searches = matchmaker.find(searches, team_size)
+    matches, unmatched_searches = matchmaker.find(searches, team_size, 1000)
 
     assert len(matches) == num_players / 2 / team_size
     assert len(unmatched_searches) == num_players - 2 * team_size * len(matches)
@@ -296,7 +296,7 @@ def test_BucketTeammatchmaker_2v2_mixed_party_sizes(player_factory):
 
     team_size = 2
     matchmaker = BucketTeamMatchmaker()
-    matches, unmatched_searches = matchmaker.find(searches, team_size)
+    matches, unmatched_searches = matchmaker.find(searches, team_size, 1000)
 
     assert len(matches) == num_players / 2 / team_size
     assert len(unmatched_searches) == num_players - 2 * team_size * len(matches)
@@ -316,7 +316,7 @@ def test_2v2_count_unmatched_searches(player_factory):
 
     team_size = 2
     matchmaker = BucketTeamMatchmaker()
-    matches, unmatched_searches = matchmaker.find(searches, team_size)
+    matches, unmatched_searches = matchmaker.find(searches, team_size, 1000)
 
     assert len(matches) == 1
     number_of_unmatched_players = sum(