Skip to content

Commit

Permalink
Merge pull request #6 from andersonfrailey/midseasontrades
Browse files Browse the repository at this point in the history
Allow mid-season transactions
  • Loading branch information
andersonfrailey authored Jan 30, 2022
2 parents afe764a + f7d4a8b commit 645b060
Showing 1 changed file with 103 additions and 39 deletions.
142 changes: 103 additions & 39 deletions satchel/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pathlib import Path
from typing import Callable
from tqdm import tqdm
from datetime import datetime


CUR_PATH = Path(__file__).resolve().parent
Expand Down Expand Up @@ -55,6 +56,7 @@ def __init__(
self.talent_measure = talent_measure
self.transactions = transactions
self.schedule = pd.read_csv(SCHEDUEL_PATH)
self.schedule["START DATE"] = pd.to_datetime(self.schedule["START DATE"])
self.teams = constants.DIVS.keys()
self.random = np.random.default_rng(seed)
self.noise = noise
Expand All @@ -64,7 +66,7 @@ def __init__(
self.steamer_b_wt = steamer_b_wt
self.zips_b_wt = zips_b_wt

self.talent = self._calculate_talent(transactions)
self.talent, self.st_data = self._calculate_talent(transactions)

def simulate(
self,
Expand All @@ -85,21 +87,6 @@ def simulate(
SatchelResults
Instance of the SatchelResults class.
"""
# merge schedule and WAR data to get our dataset for simulations
data = pd.merge(
self.schedule,
self.talent[["Team", "talent"]],
left_on="away",
right_on="Team",
)
data = pd.merge(
data, self.talent[["Team", "talent"]], left_on="home", right_on="Team"
)
# clean up data after merge
data.drop(["Team_x", "Team_y"], axis=1, inplace=True)
data.rename(
columns={"talent_x": "away_talent", "talent_y": "home_talent"}, inplace=True
)
# counters to track outcomes
ws_counter = Counter() # world series championships
league_counter = Counter() # league championships
Expand All @@ -120,7 +107,7 @@ def simulate(
noise,
full_season,
) = self.simseason(
data,
self.st_data,
)
ws_counter.update([playoffs["ws"]])
div_counter.update(div_winners["Team"])
Expand All @@ -147,7 +134,7 @@ def simulate(
n,
self.transactions,
self.schedule,
data,
self.st_data,
noise,
full_seasons,
self.seed,
Expand All @@ -169,17 +156,19 @@ def simseason(self, data) -> tuple:
"""
data["h_talent"] = data["home_talent"]
data["a_talent"] = data["away_talent"]
_talent = self.talent[["Team", "talent"]].set_index("Team").to_dict("index")
_talent = (
self.talent[["Team", "final_talent"]].set_index("Team").to_dict("index")
)
# add random noise to team talent for the season
if self.noise:
_noise = self.random.normal(
scale=self.talent["talent"].std(), size=len(self.teams)
scale=self.talent["base_talent"].std(), size=len(self.teams)
)
team_noise = {team: _noise[i] for i, team in enumerate(self.teams)}
data["a_talent"] += np.array([team_noise[team] for team in data["away"]])
data["h_talent"] += np.array([team_noise[team] for team in data["home"]])
for team, value in team_noise.items():
_talent[team]["talent"] += value
_talent[team]["final_talent"] += value
# sim regular season
home_win_prob = np.exp(data["h_talent"]) / (
np.exp(data["a_talent"]) + np.exp(data["h_talent"])
Expand All @@ -193,9 +182,13 @@ def simseason(self, data) -> tuple:
)
data["winner"] = winner
data["loser"] = loser
results = pd.concat(
[winner.value_counts(), loser.value_counts()], axis=1
).reset_index()
wins = winner.value_counts().reset_index()
losses = loser.value_counts().reset_index()
# ensure that teams will always be in the same order
results = pd.merge(wins, losses, on="index")
results = results.sort_values(
["wins", "index"], ascending=False, kind="mergesort"
)
results.rename(columns={"index": "Team"}, inplace=True)
results["league"] = results["Team"].map(constants.LEAGUE)
results["division"] = results["Team"].map(constants.DIV)
Expand Down Expand Up @@ -235,8 +228,9 @@ def sim_round(teams, talent, n_games):
"""
team1 = 0
team2 = 0
team1_win_prob = np.exp(talent[teams[0]]["talent"]) / (
np.exp(talent[teams[0]]["talent"]) + np.exp(talent[teams[1]]["talent"])
team1_win_prob = np.exp(talent[teams[0]]["final_talent"]) / (
np.exp(talent[teams[0]]["final_talent"])
+ np.exp(talent[teams[1]]["final_talent"])
)
for _ in range(n_games):
prob = self.random.random()
Expand Down Expand Up @@ -398,32 +392,60 @@ def _calculate_talent(self, transactions=None):
)
batter_proj.set_index("playerid", inplace=True)

# conduct transactions
if transactions:
self._conduct_transactions(pitch_proj, batter_proj, transactions)

# group all of the WAR projections by team and add them
pwar_proj = pitch_proj.groupby("Team")["WAR_P"].sum()
bwar_proj = batter_proj.groupby("Team")["WAR_B"].sum()

talent = pd.concat([bwar_proj, pwar_proj], axis=1)
talent["total"] = talent.sum(axis=1)
talent.reset_index(inplace=True)
# calculate talent
# calculate baseline talent
if self.talent_measure == "median":
league_base = np.median(talent["total"])
elif self.talent_measure == "mean":
league_base = np.mean(talent["total"])
talent["talent"] = talent["total"] / league_base - 1
# baseline talent before any transactions
talent["base_talent"] = talent["total"] / league_base - 1

# merge on division info
talent["league"] = talent["Team"].map(constants.LEAGUE)
talent["division"] = talent["Team"].map(constants.DIV)
return talent

def _conduct_transactions(self, pitchers, batters, transactions):
# merge together schedule and talent data
st_data = pd.merge(
self.schedule,
talent[["Team", "total"]],
left_on="away",
right_on="Team",
)
st_data = pd.merge(
st_data, talent[["Team", "total"]], left_on="home", right_on="Team"
)
# clean up data after merge
st_data.drop(["Team_x", "Team_y"], axis=1, inplace=True)
st_data.rename(
columns={"total_x": "away_total", "total_y": "home_total"}, inplace=True
)

# conduct transactions
if transactions:
talent = self._conduct_transactions(
pitch_proj, batter_proj, transactions, st_data, talent
)
talent["final_talent"] = talent["final_total"] / league_base - 1
else:
talent["final_talent"] = talent["base_talent"]

# calculate talent levels used for simulations
st_data["away_talent"] = st_data["away_total"] / league_base - 1
st_data["home_talent"] = st_data["home_total"] / league_base - 1

return talent, st_data

def _conduct_transactions(self, pitchers, batters, transactions, st_data, talent):
"""Update the pitcher and hitter projection DFs with the transactions
specified
specified. Format should be:
{player_id: {'date': 'YYYY-MM-DD', 'team': new_team}}
Parameters
----------
Expand All @@ -436,18 +458,60 @@ def _conduct_transactions(self, pitchers, batters, transactions):
"""
assert isinstance(transactions, dict), "Transactions must be dictionary"
# loop through each transaction and update the player's team
for _id, _team in transactions.items():
last_date = datetime.strptime("1900-01-01", "%Y-%m-%d")
for _id, info in transactions.items():
# verify the team exists
team = _team.upper()
team = info["team"].upper()
# check that it's a valid team name, or no team at all
if team not in self.teams and team != "":
close = difflib.get_close_matches(team, self.teams)
msg = f"{team} is not a valid team. Close matches are: {close}"
raise ValueError(msg)
# find the old team and WAR of player being moved
if _id in pitchers.index:
pitchers.at[_id, "Team"] = team
old_team = pitchers.at[_id, "Team"]
war = pitchers.at[_id, "WAR_P"]
elif _id in batters.index:
batters.at[_id, "Team"] = team
old_team = batters.at[_id, "Team"]
war = batters.at[_id, "WAR_B"]
else:
msg = f"{_id} is an unrecognized player ID"
raise ValueError(msg)
# update the talent for each each team involved
st_data["away_total"] = np.where(
(st_data["START DATE"] >= info["date"]) & (st_data["away"] == team),
st_data["away_total"] + war,
st_data["away_total"],
)
st_data["home_total"] = np.where(
(st_data["START DATE"] >= info["date"]) & (st_data["home"] == team),
st_data["home_total"] + war,
st_data["home_total"],
)
# remove the player's WAR from their old team
st_data["away_total"] = np.where(
(st_data["START DATE"] >= info["date"]) & (st_data["away"] == old_team),
st_data["away_total"] - war,
st_data["away_total"],
)
st_data["home_total"] = np.where(
(st_data["START DATE"] >= info["date"]) & (st_data["home"] == old_team),
st_data["home_total"] - war,
st_data["home_total"],
)
# update date of last transaction
if datetime.strptime(info["date"], "%Y-%m-%d") > last_date:
last_date = datetime.strptime(info["date"], "%Y-%m-%d")

# find the final talent for each team after all the transactions. This
# will be used for the playoff simulations
finaldf = (
st_data[st_data["START DATE"] >= last_date]
.groupby("away")["away_total"]
.mean()
.reset_index()
.rename({"away": "Team", "away_total": "final_total"}, axis=1)
)
talent = talent.merge(finaldf, on="Team")

return talent

0 comments on commit 645b060

Please sign in to comment.