config.toml

###############################################################################
#                           Network configuration                             #
###############################################################################

[network]

network_name = "chesscoach1"
role = "train|play"

[[networks]]

    # Generate self-play games using the student network for speed until 800k steps.
    name = "selfplay11"

    [networks.training]

    stages = [
        # Self-play
        { stage = "play" },

        # Train self-play, teacher then distill
        { stage = "train", target = "teacher" },
        { stage = "save", target = "teacher" },
        { stage = "save_swa", target = "teacher" },
        { stage = "train", target = "student" },
        { stage = "save", target = "student" },
        { stage = "save_swa", target = "student" },

        # Strength test (STS rating)
        { stage = "strength_test", target = "teacher" },
        { stage = "strength_test", target = "student" },
    ]

    games_path_training = "Games/Fresh7"

    steps = 800_000 # Equivalent to 100,000 steps of batch size 4096

    [networks.self_play]

    network_type = "student"

[[networks]]

    # To prepare for teacher-only prediction, copy selfplay11_000800000/teacher to selfplay11c_000800000/teacher
    # and copy 800k steps' worth of chunks from Fresh7 to Fresh7b (6286000 games, 3143 chunks).
    name = "selfplay11c"

    [networks.training]

    stages = [
        # Self-play
        { stage = "play" },

        # Train self-play, teacher
        { stage = "train", target = "teacher" },
        { stage = "save", target = "teacher" },
        { stage = "save_swa", target = "teacher" },

        # Strength test (STS rating)
        { stage = "strength_test", target = "teacher" },
    ]

    games_path_training = "Games/Fresh7b"

[[networks]]

    # To prepare for commentary training, copy selfplay11c_005600000/teacher/swa to selfplay11d_000000000/teacher/model.
    name = "selfplay11d"
    
    [networks.training]

    stages = [
        # Train commentary, teacher
        { stage = "train_commentary", target = "teacher" },
        { stage = "save", target = "teacher" },
    ]

    steps = 400_000 # Equivalent to 50,000 steps of batch size 4096

[[networks]]

    # To finalize training, selfplay11c_005600000/teacher/swa and selfplay11d_000400000/teacher/commentary into chesscoach1_005600000.
    name = "chesscoach1"

    [networks.training]

    stages = [
        # Already trained: just signal loading SWA weights.
        { stage = "save_swa", target = "teacher" },
    ]

[[networks]]

    name = "supervised1"
    
    [networks.training]

    num_games = 2_000_000
    steps = 256_000

    value_loss_weight = 0.1
    mcts_value_loss_weight = 0.0

    stages = [
        # Train supervised, teacher
        { stage = "train", target = "teacher" },
        { stage = "save", target = "teacher" },

        # Strength test (STS rating)
        { stage = "strength_test", target = "teacher" },
    ]

    games_path_training = "Games/Supervised"

[[networks]]

    name = "benchmark1"

    [networks.training]

    stages = [
        # Self-play
        { stage = "play" },
    ]

    games_path_training = "Games/Benchmark"

    [networks.self_play]

    allow_uniform = false

###############################################################################
#     Default training and self-play configuration. Networks can override.    #
###############################################################################

[training]

num_games = 44_000_000
window_size = 1_000_000
batch_size = 512
commentary_batch_size = 512 # Use 64 on GTX 1080, 128 on V100, 512 on v3-8 TPU.
steps = 5_600_000 # Equivalent to 700,000 steps of batch size 4096
warmup_steps = 8000
pgn_interval = 10_000
validation_interval = 2000
checkpoint_interval = 10_000
strength_test_interval = 40_000
steps_per_execution = 50
value_loss_weight = 1.0
mcts_value_loss_weight = 0.15
policy_loss_weight = 1.0
momentum = 0.9
commentary_learning_rate_min = 1e-5 # Not multiplied by device count: range-test on each environment
commentary_learning_rate_max = 1e-3 # Not multiplied by device count: range-test on each environment
commentary_cyclic_step_size = 20_000 # Half of the cycle length
dataset_shuffle_positions_training = 524_288 # 2^19 (~9.5 GiB, 19568 bytes payload per position)
dataset_shuffle_positions_validation = 4096 # 2^12
dataset_keep_game_proportion = 0.2
dataset_keep_position_proportion = 0.1
dataset_parallel_reads = 32
swa_decay = 0.5 # Good in practice for 10k-checkpoints - adjust geometrically for different checkpoint sizes.
swa_minimum_contribution = 0.01 # Proportion, determines number of network checkpoints to average on resume.
swa_batchnorm_steps = 4000 # Becomes 500 actual steps on TPU. With default 0.99 batch normalization momentum, tested to be enough.
vocabulary_filename = "vocabulary.txt"
games_path_training = "Games/Training"
games_path_validation = "Games/Validation"
commentary_path = "Commentary"
wait_milliseconds = 300_000 # Check on Google Storage every 5 minutes when waiting for other machines.
stages = []

[training.learning_rate_schedule]

steps = [0, 800_000, 2_400_000, 4_000_000] # Equivalent to 100,000, 300,000, 500,000 with batch size 4096
rates = [2.0e-2, 2.5e-3, 2.5e-4, 2.5e-5] # Multiplied by device_count

[self_play]

network_type = "teacher"
# Instead of using the latest weights found for "network_name", use these specific ones; e.g., selfplay6a_001000000.
network_weights = ""
allow_uniform = true

# Use 2*512 on GTX 1080 (student/teacher), 4*512 on 4x V100 (student/teacher), 8*512 on v3-8 TPU (student/teacher).
num_workers = 8
prediction_batch_size = 512

num_sampling_moves = 30
max_moves = 512
num_simulations = 800

root_dirichlet_alpha = 0.3
root_exploration_fraction = 0.25

exploration_rate_init = 2.35
exploration_rate_base = 185_000.0
linear_exploration_rate = 3590.0
linear_exploration_delay = 1_050_000.0
virtual_loss_coefficient = 0.1
moving_average_build = 0.8
moving_average_cap = 25_000_000.0
backpropagation_puct_threshold = 0.02
elimination_base_exponent = 10 # Start by giving the top 2^10 = 1024 children linear exploration incentive, decaying down to 2^1 = 2.
move_diversity_value_delta_threshold = 0.0089
move_diversity_temperature = 2.52
move_diversity_plies = 4
transposition_progress_threshold = 80
progress_decay_divisor = 151
minimax_material_maximum = 6080
minimax_visits_recurse = 3258
minimax_visits_ignore = 0.14

wait_for_updated_network = false

###############################################################################
#                       Miscellaneous configuration                           #
###############################################################################

[prediction_cache]

Hash = 8192 # Maps to PredictionCache_SizeMebibytes (named to auto-match UCI option).
max_ply = 30

[time_control]

safety_buffer_move_milliseconds = 100
safety_buffer_overall_milliseconds = 1000
fraction_of_remaining = 32
absolute_minimum_milliseconds = 150

[search]

# As a general rule, set threads to number of logical GPUs/TPUs, but at least 2.
# Use 2*256 on GTX 1080 (teacher), 4*256 on 4x V100 (teacher), 8*256 on v3-8 TPU (teacher).
search_threads = 8
search_parallelism = 256
slowstart_nodes = 1024
slowstart_threads = 1
slowstart_parallelism = 32
gui_update_interval_nodes = 1000

[commentary]

top_p = 0.1
temperature = 1.5

[bot]

commentary_minimum_remaining_milliseconds = 30_000
ponder_buffer_max_milliseconds = 1500
ponder_buffer_min_milliseconds = 250
ponder_buffer_proportion = 0.01
increment_fraction = 0.95

[storage]

games_per_chunk = 2000

[paths]

# With the below config, a network may be saved to "gs://chesscoach-eu/ChessCoach/Networks/network_000010000".
# If the path can be accessed via tf.io.gfile then ChessCoach will run in a "cloud" configuration.
# This is currently only supported on Linux, as tf.io.gfile doesn't handle gs:// on Windows.
cloud_data_root = "gs://chesscoach-eu/ChessCoach"

networks = "Networks"
tensorboard = "TensorBoard"
logs = "Logs"
pgns = "Pgns"
optimization = "Optimization"
alpha_manager = "AlphaManager"
syzygy = "Syzygy"

strength_test_marker_prefix = "StrengthTestComplete"

[optimization]

# Mode can be "epd" using "nodes required" metric or "tournament" using mini-tournament Elo metric.
mode = "epd"
resume_latest = false
log_interval = 10
plot_interval = 10
distributed_zone = "europe-west4-a"
distributed_hosts = [] # Only supported with "tournament" mode and alpha TPU VMs/pods currently.
distributed_vs_stockfish = false

# "Nodes required" metric is the node count when the solution was first hit as the principal variation
# without later switching away, or "epd_failure_nodes" if the wrong answer was given for "bestmove".
# The first limit of "epd_movetime_milliseconds" and "epd_nodes" hit ends the search for each position,
# with 0 meaning no limit.
epd = "Arasan21.epd"
epd_movetime_milliseconds = 10_000
epd_nodes = 0
epd_failure_nodes = 10_000_000
epd_position_limit = 10

# Elo metric is calculated relative to baseline (Stockfish 13 NNUE with 8 threads, 8192 MiB hash, 3-4-5 Syzygy tablebases).
tournament_games = 10
tournament_time_control = "60+0.6" # 60+0.6 fast (1.8 min each for 80 moves), 300+3 slow (9 min each)

[optimization.parameters]

# Consumed by scikit-optimize, evaluated directly in Python. Examples:
# '(1, 5)'
# '(0.0, 1.0)'
# '(1e3, 1e6, "log-uniform")'
# '("small", "medium", "large")'
# NOTE: Updates are only seen by C++. Custom propagation is needed if Python needs to see an update.
# NOTE: Parameters must also be listed under [uci_options] when using mini-tournament-based optimization.
#fraction_of_remaining = '(20, 60)' # Example

[uci_options]

# NOTE: Updates are only seen by C++. Custom propagation is needed if Python needs to see an update.
# NOTE: Some options can only be set before initialization; e.g., search threads, parallelism, and weights.
network_type = { type = "string" }
network_weights = { type = "string" }
search_threads = { type = "spin", min = 1, max = 256 }
search_parallelism = { type = "spin", min = 1, max = 4096 }
fraction_of_remaining = { type = "spin", min = 5, max = 100 }
safety_buffer_move_milliseconds = { type = "spin", min = 0, max = 5000 }
safety_buffer_overall_milliseconds = { type = "spin", min = 0, max = 30000 }
Hash = { type = "spin", min = 0, max = 262_144 }
exploration_rate_init = { type = "float" }
exploration_rate_base = { type = "float" }
linear_exploration_rate = { type = "float" }
linear_exploration_delay = { type = "float" }
virtual_loss_coefficient = { type = "float" }
moving_average_build = { type = "float" }
moving_average_cap = { type = "float" }
backpropagation_puct_threshold = { type = "float" }
elimination_base_exponent = { type = "spin", min = 2, max = 16 }
move_diversity_value_delta_threshold = { type = "float" }
move_diversity_temperature = { type = "float" }
move_diversity_plies = { type = "spin", min = 0, max = 512 }
syzygy = { type = "string" }
transposition_progress_threshold = { type = "spin", min = 0, max = 100 }
progress_decay_divisor = { type = "spin", min = 100, max = 1000 }
minimax_material_maximum = { type = "spin", min = 0, max = 15258 }
minimax_visits_recurse = { type = "spin", min = 0, max = 10_000 }
minimax_visits_ignore = { type = "float" }

###############################################################################