-
Notifications
You must be signed in to change notification settings - Fork 3
/
player.py
60 lines (52 loc) · 1.98 KB
/
player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from monte_carlo_tree_search import MCTS
from baghchal.lookup_table import action_space
import numpy as np
class HumanPlayer:
def __init__(self):
pass
def get_action(self, board):
while True:
move = input("Enter your move: ")
try:
board.validate(move)
return move
except Exception as e:
print(e)
class MCTSPlayer:
def __init__(self, policy_value_fn,
cpuct=5, n_playout=2000, is_selfplay=0):
self.mcts = MCTS(policy_value_fn, cpuct, n_playout)
self.is_selfplay = is_selfplay
def reset_player(self):
self.mcts.update_with_move(-1)
def get_action(self, board, temp=1e-3, return_prob=0):
sensible_moves = board.possible_moves()
# the pi vector returned by MCTS as in the alphaGo Zero paper
move_probs = np.zeros(217)
if sensible_moves:
acts, probs = self.mcts.get_move_probs(board, temp)
counter=0
for act in acts:
index=action_space[act]
move_probs[index]=probs[counter]
counter+=1
if self.is_selfplay:
# add Dirichlet Noise for exploration (needed for
# self-play training)
move = np.random.choice(acts,p=
0.75* probs + 0.25 *
np.random.dirichlet(0.3 * np.ones(len(probs))))
# update the root node and reuse the search tree
self.mcts.update_with_move(move)
else:
# with the default temp=1e-3, it is almost equivalent
# to choosing the move with the highest prob
move = np.random.choice(acts, p=probs)
# reset the root node
self.mcts.update_with_move(-1)
if return_prob:
return move, move_probs
else:
return move
else:
print("WARNING ! Game is over.")