forked from chrisbutner/ChessCoach
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.toml
342 lines (261 loc) · 11.2 KB
/
config.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
###############################################################################
# Network configuration #
###############################################################################
[network]
network_name = "chesscoach1"
role = "train|play"
[[networks]]
# Generate self-play games using the student network for speed until 800k steps.
name = "selfplay11"
[networks.training]
stages = [
# Self-play
{ stage = "play" },
# Train self-play, teacher then distill
{ stage = "train", target = "teacher" },
{ stage = "save", target = "teacher" },
{ stage = "save_swa", target = "teacher" },
{ stage = "train", target = "student" },
{ stage = "save", target = "student" },
{ stage = "save_swa", target = "student" },
# Strength test (STS rating)
{ stage = "strength_test", target = "teacher" },
{ stage = "strength_test", target = "student" },
]
games_path_training = "Games/Fresh7"
steps = 800_000 # Equivalent to 100,000 steps of batch size 4096
[networks.self_play]
network_type = "student"
[[networks]]
# To prepare for teacher-only prediction, copy selfplay11_000800000/teacher to selfplay11c_000800000/teacher
# and copy 800k steps' worth of chunks from Fresh7 to Fresh7b (6286000 games, 3143 chunks).
name = "selfplay11c"
[networks.training]
stages = [
# Self-play
{ stage = "play" },
# Train self-play, teacher
{ stage = "train", target = "teacher" },
{ stage = "save", target = "teacher" },
{ stage = "save_swa", target = "teacher" },
# Strength test (STS rating)
{ stage = "strength_test", target = "teacher" },
]
games_path_training = "Games/Fresh7b"
[[networks]]
# To prepare for commentary training, copy selfplay11c_005600000/teacher/swa to selfplay11d_000000000/teacher/model.
name = "selfplay11d"
[networks.training]
stages = [
# Train commentary, teacher
{ stage = "train_commentary", target = "teacher" },
{ stage = "save", target = "teacher" },
]
steps = 400_000 # Equivalent to 50,000 steps of batch size 4096
[[networks]]
# To finalize training, selfplay11c_005600000/teacher/swa and selfplay11d_000400000/teacher/commentary into chesscoach1_005600000.
name = "chesscoach1"
[networks.training]
stages = [
# Already trained: just signal loading SWA weights.
{ stage = "save_swa", target = "teacher" },
]
[[networks]]
name = "supervised1"
[networks.training]
num_games = 2_000_000
steps = 256_000
value_loss_weight = 0.1
mcts_value_loss_weight = 0.0
stages = [
# Train supervised, teacher
{ stage = "train", target = "teacher" },
{ stage = "save", target = "teacher" },
# Strength test (STS rating)
{ stage = "strength_test", target = "teacher" },
]
games_path_training = "Games/Supervised"
[[networks]]
name = "benchmark1"
[networks.training]
stages = [
# Self-play
{ stage = "play" },
]
games_path_training = "Games/Benchmark"
[networks.self_play]
allow_uniform = false
###############################################################################
# Default training and self-play configuration. Networks can override. #
###############################################################################
[training]
num_games = 44_000_000
window_size = 1_000_000
batch_size = 512
commentary_batch_size = 512 # Use 64 on GTX 1080, 128 on V100, 512 on v3-8 TPU.
steps = 5_600_000 # Equivalent to 700,000 steps of batch size 4096
warmup_steps = 8000
pgn_interval = 10_000
validation_interval = 2000
checkpoint_interval = 10_000
strength_test_interval = 40_000
steps_per_execution = 50
value_loss_weight = 1.0
mcts_value_loss_weight = 0.15
policy_loss_weight = 1.0
momentum = 0.9
commentary_learning_rate_min = 1e-5 # Not multiplied by device count: range-test on each environment
commentary_learning_rate_max = 1e-3 # Not multiplied by device count: range-test on each environment
commentary_cyclic_step_size = 20_000 # Half of the cycle length
dataset_shuffle_positions_training = 524_288 # 2^19 (~9.5 GiB, 19568 bytes payload per position)
dataset_shuffle_positions_validation = 4096 # 2^12
dataset_keep_game_proportion = 0.2
dataset_keep_position_proportion = 0.1
dataset_parallel_reads = 32
swa_decay = 0.5 # Good in practice for 10k-checkpoints - adjust geometrically for different checkpoint sizes.
swa_minimum_contribution = 0.01 # Proportion, determines number of network checkpoints to average on resume.
swa_batchnorm_steps = 4000 # Becomes 500 actual steps on TPU. With default 0.99 batch normalization momentum, tested to be enough.
vocabulary_filename = "vocabulary.txt"
games_path_training = "Games/Training"
games_path_validation = "Games/Validation"
commentary_path = "Commentary"
wait_milliseconds = 300_000 # Check on Google Storage every 5 minutes when waiting for other machines.
stages = []
[training.learning_rate_schedule]
steps = [0, 800_000, 2_400_000, 4_000_000] # Equivalent to 100,000, 300,000, 500,000 with batch size 4096
rates = [2.0e-2, 2.5e-3, 2.5e-4, 2.5e-5] # Multiplied by device_count
[self_play]
network_type = "teacher"
# Instead of using the latest weights found for "network_name", use these specific ones; e.g., selfplay6a_001000000.
network_weights = ""
allow_uniform = true
# Use 2*512 on GTX 1080 (student/teacher), 4*512 on 4x V100 (student/teacher), 8*512 on v3-8 TPU (student/teacher).
num_workers = 8
prediction_batch_size = 512
num_sampling_moves = 30
max_moves = 512
num_simulations = 800
root_dirichlet_alpha = 0.3
root_exploration_fraction = 0.25
exploration_rate_init = 2.35
exploration_rate_base = 185_000.0
linear_exploration_rate = 3590.0
linear_exploration_delay = 1_050_000.0
virtual_loss_coefficient = 0.1
moving_average_build = 0.8
moving_average_cap = 25_000_000.0
backpropagation_puct_threshold = 0.02
elimination_base_exponent = 10 # Start by giving the top 2^10 = 1024 children linear exploration incentive, decaying down to 2^1 = 2.
move_diversity_value_delta_threshold = 0.0089
move_diversity_temperature = 2.52
move_diversity_plies = 4
transposition_progress_threshold = 80
progress_decay_divisor = 151
minimax_material_maximum = 6080
minimax_visits_recurse = 3258
minimax_visits_ignore = 0.14
wait_for_updated_network = false
###############################################################################
# Miscellaneous configuration #
###############################################################################
[prediction_cache]
Hash = 8192 # Maps to PredictionCache_SizeMebibytes (named to auto-match UCI option).
max_ply = 30
[time_control]
safety_buffer_move_milliseconds = 100
safety_buffer_overall_milliseconds = 1000
fraction_of_remaining = 32
absolute_minimum_milliseconds = 150
[search]
# As a general rule, set threads to number of logical GPUs/TPUs, but at least 2.
# Use 2*256 on GTX 1080 (teacher), 4*256 on 4x V100 (teacher), 8*256 on v3-8 TPU (teacher).
search_threads = 8
search_parallelism = 256
slowstart_nodes = 1024
slowstart_threads = 1
slowstart_parallelism = 32
gui_update_interval_nodes = 1000
[commentary]
top_p = 0.1
temperature = 1.5
[bot]
commentary_minimum_remaining_milliseconds = 30_000
ponder_buffer_max_milliseconds = 1500
ponder_buffer_min_milliseconds = 250
ponder_buffer_proportion = 0.01
increment_fraction = 0.95
[storage]
games_per_chunk = 2000
[paths]
# With the below config, a network may be saved to "gs://chesscoach-eu/ChessCoach/Networks/network_000010000".
# If the path can be accessed via tf.io.gfile then ChessCoach will run in a "cloud" configuration.
# This is currently only supported on Linux, as tf.io.gfile doesn't handle gs:// on Windows.
cloud_data_root = "gs://chesscoach-eu/ChessCoach"
networks = "Networks"
tensorboard = "TensorBoard"
logs = "Logs"
pgns = "Pgns"
optimization = "Optimization"
alpha_manager = "AlphaManager"
syzygy = "Syzygy"
strength_test_marker_prefix = "StrengthTestComplete"
[optimization]
# Mode can be "epd" using "nodes required" metric or "tournament" using mini-tournament Elo metric.
mode = "epd"
resume_latest = false
log_interval = 10
plot_interval = 10
distributed_zone = "europe-west4-a"
distributed_hosts = [] # Only supported with "tournament" mode and alpha TPU VMs/pods currently.
distributed_vs_stockfish = false
# "Nodes required" metric is the node count when the solution was first hit as the principal variation
# without later switching away, or "epd_failure_nodes" if the wrong answer was given for "bestmove".
# The first limit of "epd_movetime_milliseconds" and "epd_nodes" hit ends the search for each position,
# with 0 meaning no limit.
epd = "Arasan21.epd"
epd_movetime_milliseconds = 10_000
epd_nodes = 0
epd_failure_nodes = 10_000_000
epd_position_limit = 10
# Elo metric is calculated relative to baseline (Stockfish 13 NNUE with 8 threads, 8192 MiB hash, 3-4-5 Syzygy tablebases).
tournament_games = 10
tournament_time_control = "60+0.6" # 60+0.6 fast (1.8 min each for 80 moves), 300+3 slow (9 min each)
[optimization.parameters]
# Consumed by scikit-optimize, evaluated directly in Python. Examples:
# '(1, 5)'
# '(0.0, 1.0)'
# '(1e3, 1e6, "log-uniform")'
# '("small", "medium", "large")'
# NOTE: Updates are only seen by C++. Custom propagation is needed if Python needs to see an update.
# NOTE: Parameters must also be listed under [uci_options] when using mini-tournament-based optimization.
#fraction_of_remaining = '(20, 60)' # Example
[uci_options]
# NOTE: Updates are only seen by C++. Custom propagation is needed if Python needs to see an update.
# NOTE: Some options can only be set before initialization; e.g., search threads, parallelism, and weights.
network_type = { type = "string" }
network_weights = { type = "string" }
search_threads = { type = "spin", min = 1, max = 256 }
search_parallelism = { type = "spin", min = 1, max = 4096 }
fraction_of_remaining = { type = "spin", min = 5, max = 100 }
safety_buffer_move_milliseconds = { type = "spin", min = 0, max = 5000 }
safety_buffer_overall_milliseconds = { type = "spin", min = 0, max = 30000 }
Hash = { type = "spin", min = 0, max = 262_144 }
exploration_rate_init = { type = "float" }
exploration_rate_base = { type = "float" }
linear_exploration_rate = { type = "float" }
linear_exploration_delay = { type = "float" }
virtual_loss_coefficient = { type = "float" }
moving_average_build = { type = "float" }
moving_average_cap = { type = "float" }
backpropagation_puct_threshold = { type = "float" }
elimination_base_exponent = { type = "spin", min = 2, max = 16 }
move_diversity_value_delta_threshold = { type = "float" }
move_diversity_temperature = { type = "float" }
move_diversity_plies = { type = "spin", min = 0, max = 512 }
syzygy = { type = "string" }
transposition_progress_threshold = { type = "spin", min = 0, max = 100 }
progress_decay_divisor = { type = "spin", min = 100, max = 1000 }
minimax_material_maximum = { type = "spin", min = 0, max = 15258 }
minimax_visits_recurse = { type = "spin", min = 0, max = 10_000 }
minimax_visits_ignore = { type = "float" }
###############################################################################