-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgameplay.py
205 lines (194 loc) · 9.17 KB
/
gameplay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
from collections import defaultdict
from random import randint
from farkle import *
'''
Plays thru a game of farkle using some policy.
`policy` is any function that takes these arguments:
`choices` - a list of choices (which are dicts that looks like {"score": <int>, "remaining": <int>})
`total_score` - the total score of the game so far NOT including the current turn
`turn_score` - the score of the current turn so far, not including the newly thrown dice
`is_open` - True if the player has opened with dice selected so far (not including thrown dice not yet picked up)
and returns a dict like this:
{
"choice": <the choice dict they choose>,
"roll_again": <whether the player chooses to roll again>
}
policy can make roll_again false even if player is not open as the game will force them to roll again anyways
This function returns a thing like this:
{
"total_score": <int> # the total score after the game ends
"scores": <list<int>> # a list of scores from each turn (including 0 for farkles)
"turns": <int> # the number of turns
}
'''
def play_single_player_game(policy, verbose=False):
scores = [] # scores of each turn
total_score = 0 # == sum(scores)
turn_count = 0
is_open = False
while total_score < 5000:
turn_count += 1
turn_score = 0
# Lets play a turn
# FIRST THROW in turn
dice = [randint(1,6) for i in range(6)]
if verbose: print("----------------------------------------")
if verbose: print(f"Turn {turn_count}, Score {total_score}")
if verbose: print(f"Player rolled {dice}")
choices = return_all_choices(dice)
farkled = len(choices) == 0
is_open = is_open or turn_score >= 500
if not farkled:
player_decision = policy(
choices=choices,
total_score=total_score,
turn_score=turn_score,
is_open=is_open
)
player_choice = player_decision["choice"]
roll_again = player_decision["roll_again"]
turn_score += player_choice["score"]
is_open = is_open or turn_score >= 500
if verbose:
player_choice_score = player_choice["score"]
player_choice_remaining = player_choice["remaining"]
print(f"Player claims {player_choice_score} points with {player_choice_remaining} dice remaining")
while (not farkled) and ((not is_open) or roll_again):
# SUBSEQUENT THROWS in turn
new_count = player_choice["remaining"]
new_count = new_count if new_count > 0 else 6 # Re-roll all dice if run out
dice = [randint(1,6) for i in range(new_count)]
if verbose: print(f"Player rolled {dice}")
choices = return_all_choices(dice)
farkled = len(choices) == 0
if not farkled:
player_decision = policy(
choices=choices,
total_score=total_score,
turn_score=turn_score,
is_open=is_open
)
player_choice = player_decision["choice"]
roll_again = player_decision["roll_again"]
turn_score += player_choice["score"]
is_open = is_open or turn_score >= 500
if verbose:
player_choice_score = player_choice["score"]
player_choice_remaining = player_choice["remaining"]
print(f"Player claims {player_choice_score} points with {player_choice_remaining} dice remaining")
if farkled:
turn_score = 0
if verbose: print(f"Turn has ended. {turn_score} points earned in turn.")
scores.append(turn_score)
total_score += turn_score
if verbose: print("----------------------------------------")
if verbose: print(f"GAME OVER. Score: {total_score}")
result = {
"total_score": total_score,
"scores": scores,
"turns": turn_count
}
if verbose: print(result)
return result
'''
Runs multiple games and gives statistics.
`n` - number of games to play
`policy` - the policy function (see `play_single_player_game` for explanation and `policy_naive.py` for an example)
`printStats` - print statistic results (mean and standard deviations for turns per game and score per turn for the policy)
`verboseStats` - print more verbose statistics (contains histograms)
`verboseProgress` - prints a period (".") every time 1,000 games have completed being simulated.
`verboseGames` - prints the verbose output of every game simulated (warning: VERY verbose for large n)
On my machine, n=10,000 games takes about 5 seconds to complete for the demo naive policy.
The return value of this function is what `verboseStats` prints out, which looks something like this example:
```
{
"games": [
{"turns": 15, "games": 1040}, # I.e. 1,040 of the total simulated games took 15 turns to complete
...
],
"turns": [
{"score": 750, "turns": 1605}, # I.e. 1,605 of the total simulated turns ended with a score of 750
...
],
"totalTurns": 159373,
"totalGames": 10000,
"meanTurnsPerGame": 15.9373,
"stdvTurnsPerGame": 3.7843,
"meanScorePerTurn": 331.3830,
"stdvScorePerTurn": 322.6619
}
```
'''
def analyze_n_single_player_games(n, policy, printStats=True, verboseStats=True, verboseProgress=True, verboseGames=False):
turns_hist = defaultdict(int)
games_hist = defaultdict(int)
progress = 0
for i in range(n):
if progress % 1000 == 0:
if verboseProgress: print(".",end="",flush=True) # Print status marker every 1000 games
trial = play_single_player_game(policy, verboseGames)
games_hist[trial["turns"]] += 1
for score in trial["scores"]:
turns_hist[score] += 1
progress += 1
if verboseProgress: print()
turns_per_game = sorted([{"turns": k, "games": v} for k,v in games_hist.items()], key=lambda bucket: bucket["turns"])
score_per_turn = sorted([{"score": k, "turns": v} for k,v in turns_hist.items()], key=lambda bucket: bucket["score"])
turns_count = sum(bucket["turns"]*bucket["games"] for bucket in turns_per_game)
games_count = n
mean_turns_per_game = sum(bucket["games"]*bucket["turns"] for bucket in turns_per_game)/games_count
mean_score_per_turn = sum(bucket["turns"]*bucket["score"] for bucket in score_per_turn)/turns_count
stdv_turns_per_game = (sum(bucket["games"]*(bucket["turns"]-mean_turns_per_game)**2 for bucket in turns_per_game)/games_count)**0.5
stdv_score_per_turn = (sum(bucket["turns"]*(bucket["score"]-mean_score_per_turn)**2 for bucket in score_per_turn)/turns_count)**0.5
result = {
"games": turns_per_game,
"turns": score_per_turn,
"totalTurns": turns_count,
"totalGames": games_count,
"meanTurnsPerGame": mean_turns_per_game,
"stdvTurnsPerGame": stdv_turns_per_game,
"meanScorePerTurn": mean_score_per_turn,
"stdvScorePerTurn": stdv_score_per_turn
}
if verboseStats: print(result)
if printStats: print(f"Turns per game: {mean_turns_per_game:.4f} (\u00b1{stdv_turns_per_game:.4f})")
if printStats: print(f"Score per turn: {mean_score_per_turn:.4f} (\u00b1{stdv_score_per_turn:.4f})")
return result
'''
Count how many games each policy wins. Presumably this could be deduced from the histograms in `analyze_n_single_player_games`
(or by using median or mode instead of mean... maybe???) but this is a brute force game by game comparison.
`n` - play n games for each policy
`policy1` - the first policy
`policy2` - the second policy
`verboseProgress` - print a period (".") every time 10,000 pairs of games have completed (note, 10,000, not 1,000)
'''
def compare_policies(n, policy1, policy2, verboseProgress=True):
policy1wins = 0
policy2wins = 0
print(f"Running {n} games for each of two policies:")
progress = 0
for i in range(n):
if progress % 10000 == 0:
if verboseProgress: print(".",end="",flush=True) # Print status marker every 1000 games
result1 = play_single_player_game(policy1)
result2 = play_single_player_game(policy2)
if result1["turns"] == result2["turns"]:
if result1["total_score"] > result2["total_score"]:
policy1wins += 1
elif result1["total_score"] < result2["total_score"]:
policy2wins += 1
# Else they tie
elif result1["turns"] < result2["turns"]:
policy1wins += 1
else:
policy2wins += 1
progress += 1
if verboseProgress: print()
print(f"Policy 1 wins: {100*policy1wins/n:.4f} % of games ({policy1wins}/{n})")
print(f"Policy 2 wins: {100*policy2wins/n:.4f} % of games ({policy2wins}/{n})")
tie_total = n-policy1wins-policy2wins
tie_percent = tie_total/n
print(f"Policies tie: { 100*tie_percent:.4f} % of games ({tie_total}/{n})")
if __name__ == "__main__":
print(is_farkle([2,3]))
print(return_all_choices([1,2,2,2,4,5]))