forked from hijkzzz/alpha-zero-gomoku
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
68 lines (56 loc) · 2.7 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
config = {
# gomoku
'n': 15, # board size
'n_in_row': 5, # n in row
# mcts
'libtorch_use_gpu' : True, # libtorch use cuda
'num_mcts_threads': 1, # mcts threads number
'num_mcts_sims': 1600, # mcts simulation times
'c_puct': 5, # puct coeff
'c_virtual_loss': 3, # virtual loss coeff
# neural_network
'train_use_gpu' : True, # train neural network using cuda
'lr': 0.001, # learning rate
'l2': 0.0001, # L2
'num_channels': 256, # convolution neural network channel size
'num_layers' : 8, # residual layer number
'epochs': 0.6, # train epochs
'batch_size': 512, # batch size for training, not for simulation
# train
'num_iters': 10000,
'num_eps': 12, # self play times in per iter, must be multiples of 6
'num_train_threads': 1, # self play in parallel
'num_explore': 5, # explore step in a game
'temp': 1, # temperature
'dirichlet_alpha': 0.3, # action noise in self play games
'examples_buffer_max_len': 20, # max length of examples buffer
'noise_min': 0,
'noise_max': 0.4,
# for league training
'num_warmup': 40, # warmup_round of before league training
'check_freq': 40, # test model frequency
# alphazero evaluations
'update_threshold': 0.55, # update model threshold
'num_contest': 10, # new/old model compare times, for alphazero-like rating evaluation
# simsiam hyperparams
'use_simsiam': True,
'simsiam_loss_factor': 1,
'simsiam_move_rate': 0.5,
'simsiam_turn_rate': 0.5,
'simsiam_flip_rate': 0.25,
'apply_turn': False,
# for whr rating
'w2': 30,
# test
'human_color': 1, # human player's color
# league_training
'main_agent_selfplay_rate' : 0.35,
'main_agent_pfsp_rate' : 0.85, # 0.35 + 0.5
# The original alphastar paper says that the main agents are discarded
# if it is beaten by the main agent at the rate of 1.
# However, it is impossibile under account of WHR
# so we use an approximate rate instead.
'main_agent_discard_rate' : 0.98
}
# action size
config['action_size'] = config['n'] ** 2