From 648b230494c72a6ab39f85c9ddb73a703b184f43 Mon Sep 17 00:00:00 2001
From: Mankit Pong <2994070164@qq.com>
Date: Mon, 14 May 2018 17:36:49 +0800
Subject: [PATCH 01/18] Add multi GPU training support.
Pull request #1386.
---
training/tf/tfprocess.py | 213 +++++++++++++++++++++++++++------------
1 file changed, 146 insertions(+), 67 deletions(-)
diff --git a/training/tf/tfprocess.py b/training/tf/tfprocess.py
index 7542ce3bc..0dba54c7a 100644
--- a/training/tf/tfprocess.py
+++ b/training/tf/tfprocess.py
@@ -16,27 +16,30 @@
# You should have received a copy of the GNU General Public License
# along with Leela Zero. If not, see .
+import math
import numpy as np
import os
-import random
import tensorflow as tf
import time
import unittest
-def weight_variable(shape):
+
+def weight_variable(name, shape):
"""Xavier initialization"""
stddev = np.sqrt(2.0 / (sum(shape)))
initial = tf.truncated_normal(shape, stddev=stddev)
- weights = tf.Variable(initial)
+ weights = tf.get_variable(name, initializer=initial)
tf.add_to_collection(tf.GraphKeys.WEIGHTS, weights)
return weights
# Bias weights for layers not followed by BatchNorm
# We do not regularlize biases, so they are not
# added to the regularlizer collection
-def bias_variable(shape):
+def bias_variable(name, shape):
initial = tf.constant(0.0, shape=shape)
- return tf.Variable(initial)
+ bias = tf.get_variable(name, initializer=initial)
+ return bias
+
def conv2d(x, W):
return tf.nn.conv2d(x, W, data_format='NCHW',
@@ -108,6 +111,9 @@ def __init__(self):
self.RESIDUAL_FILTERS = 128
self.RESIDUAL_BLOCKS = 6
+ # Set number of GPUs for training
+ self.gpus_num = 1
+
# For exporting
self.weights = []
@@ -156,13 +162,55 @@ def init(self, batch_size, macrobatch=1, logbase='leelalogs'):
self.init_net(planes, probs, winner)
def init_net(self, planes, probs, winner):
- self.x = planes # (tf.float32, [None, 18, 19 * 19])
- self.y_ = probs # (tf.float32, [None, 362])
- self.z_ = winner # (tf.float32, [None, 1])
+ self.y_ = probs # (tf.float32, [None, 362])
+ self.sx = tf.split(planes, self.gpus_num)
+ self.sy_ = tf.split(probs, self.gpus_num)
+ self.sz_ = tf.split(winner, self.gpus_num)
self.batch_norm_count = 0
- self.y_conv, self.z_conv = self.construct_net(self.x)
+ self.reuse_var = None
- if self.swa_enabled == True:
+ # You need to change the learning rate here if you are training
+ # from a self-play training set, for example start with 0.005 instead.
+ opt = tf.train.MomentumOptimizer(
+ learning_rate=0.05, momentum=0.9, use_nesterov=True)
+
+ # Construct net here.
+ tower_grads = []
+ tower_loss = []
+ tower_policy_loss = []
+ tower_mse_loss = []
+ tower_reg_term = []
+ tower_y_conv = []
+ with tf.variable_scope(tf.get_variable_scope()):
+ for i in range(self.gpus_num):
+ with tf.device("/gpu:%d" % i):
+ with tf.name_scope("tower_%d" % i):
+ loss, policy_loss, mse_loss, reg_term, y_conv = self.tower_loss(
+ self.sx[i], self.sy_[i], self.sz_[i])
+
+ # Reset batchnorm key to 0.
+ self.reset_batchnorm_key()
+
+ tf.get_variable_scope().reuse_variables()
+ grads = opt.compute_gradients(loss)
+
+ tower_grads.append(grads)
+ tower_loss.append(loss)
+ tower_policy_loss.append(policy_loss)
+ tower_mse_loss.append(mse_loss)
+ tower_reg_term.append(reg_term)
+ tower_y_conv.append(y_conv)
+
+ # Average gradients from different GPUs
+ self.loss = tf.reduce_mean(tower_loss)
+ self.policy_loss = tf.reduce_mean(tower_policy_loss)
+ self.mse_loss = tf.reduce_mean(tower_mse_loss)
+ self.reg_term = tf.reduce_mean(tower_reg_term)
+ self.y_conv = tf.concat(tower_y_conv, axis=0)
+ self.mean_grads = self.average_gradients(tower_grads)
+
+ # Do swa after we contruct the net
+ if self.swa_enabled is True:
# Count of networks accumulated into SWA
self.swa_count = tf.Variable(0., name='swa_count', trainable=False)
# Count of networks to skip
@@ -183,38 +231,13 @@ def init_net(self, planes, probs, winner):
self.swa_accum_op = tf.assign_add(n, 1.)
self.swa_load_op = tf.group(*load)
- # Calculate loss on policy head
- cross_entropy = \
- tf.nn.softmax_cross_entropy_with_logits(labels=self.y_,
- logits=self.y_conv)
- self.policy_loss = tf.reduce_mean(cross_entropy)
-
- # Loss on value head
- self.mse_loss = \
- tf.reduce_mean(tf.squared_difference(self.z_, self.z_conv))
-
- # Regularizer
- regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
- reg_variables = tf.get_collection(tf.GraphKeys.WEIGHTS)
- self.reg_term = \
- tf.contrib.layers.apply_regularization(regularizer, reg_variables)
-
- # For training from a (smaller) dataset of strong players, you will
- # want to reduce the factor in front of self.mse_loss here.
- self.loss = 1.0 * self.policy_loss + 1.0 * self.mse_loss + self.reg_term
-
- # You need to change the learning rate here if you are training
- # from a self-play training set, for example start with 0.005 instead.
- opt = tf.train.MomentumOptimizer(
- learning_rate=0.05, momentum=0.9, use_nesterov=True)
-
- # Compute and accumulate gradients
+ # Accumulate gradients
self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
total_grad=[]
grad_ops=[]
clear_var=[]
with tf.control_dependencies(self.update_ops):
- self.grad_op_real = opt.compute_gradients(self.loss)
+ self.grad_op_real = self.mean_grads
for (g, v) in self.grad_op_real:
if g is None:
total_grad.append((g,v))
@@ -261,6 +284,47 @@ def init_net(self, planes, probs, winner):
# Initialize all variables
self.session.run(tf.global_variables_initializer())
+ def average_gradients(self, tower_grads):
+ # Average gradients from different GPUs
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ grads = []
+ for g, _ in grad_and_vars:
+ expanded_g = tf.expand_dims(g, dim=0)
+ grads.append(expanded_g)
+
+ grad = tf.concat(grads, axis=0)
+ grad = tf.reduce_mean(grad, reduction_indices=0)
+
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+ def tower_loss(self, x, y_, z_):
+ y_conv, z_conv = self.construct_net(x)
+ # Calculate loss on policy head
+ cross_entropy = \
+ tf.nn.softmax_cross_entropy_with_logits(labels=y_,
+ logits=y_conv)
+ policy_loss = tf.reduce_mean(cross_entropy)
+
+ # Loss on value head
+ mse_loss = \
+ tf.reduce_mean(tf.squared_difference(z_, z_conv))
+
+ # Regularizer
+ regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001)
+ reg_variables = tf.get_collection(tf.GraphKeys.WEIGHTS)
+ reg_term = \
+ tf.contrib.layers.apply_regularization(regularizer, reg_variables)
+
+ # For training from a (smaller) dataset of strong players, you will
+ # want to reduce the factor in front of self.mse_loss here.
+ loss = 1.0 * policy_loss + 1.0 * mse_loss + reg_term
+
+ return loss, policy_loss, mse_loss, reg_term, y_conv
+
def assign(self, var, values):
try:
self.session.run(tf.assign(var, values))
@@ -432,6 +496,14 @@ def get_batchnorm_key(self):
self.batch_norm_count += 1
return result
+ def reset_batchnorm_key(self):
+ self.batch_norm_count = 0
+ self.reuse_var = True
+
+ def add_weights(self, variable):
+ if self.reuse_var is None:
+ self.weights.append(variable)
+
def batch_norm(self, net):
# The weights are internal to the batchnorm layer, so apply
# a unique scope that we can store, and use to look them back up
@@ -442,20 +514,22 @@ def batch_norm(self, net):
net,
epsilon=1e-5, axis=1, fused=True,
center=True, scale=False,
- training=self.training)
+ training=self.training,
+ reuse=self.reuse_var)
for v in ['beta', 'moving_mean', 'moving_variance' ]:
name = scope + '/batch_normalization/' + v + ':0'
var = tf.get_default_graph().get_tensor_by_name(name)
- self.weights.append(var)
+ self.add_weights(var)
return net
+ def conv_block(self, inputs, filter_size, input_channels, output_channels, name):
+ W_conv = weight_variable(
+ name,
+ [filter_size, filter_size, input_channels, output_channels])
- def conv_block(self, inputs, filter_size, input_channels, output_channels):
- W_conv = weight_variable([filter_size, filter_size,
- input_channels, output_channels])
- self.weights.append(W_conv)
+ self.add_weights(W_conv)
net = inputs
net = conv2d(net, W_conv)
@@ -463,21 +537,21 @@ def conv_block(self, inputs, filter_size, input_channels, output_channels):
net = tf.nn.relu(net)
return net
- def residual_block(self, inputs, channels):
+ def residual_block(self, inputs, channels, name):
net = inputs
orig = tf.identity(net)
# First convnet weights
- W_conv_1 = weight_variable([3, 3, channels, channels])
- self.weights.append(W_conv_1)
+ W_conv_1 = weight_variable(name + "_conv_1", [3, 3, channels, channels])
+ self.add_weights(W_conv_1)
net = conv2d(net, W_conv_1)
net = self.batch_norm(net)
net = tf.nn.relu(net)
# Second convnet weights
- W_conv_2 = weight_variable([3, 3, channels, channels])
- self.weights.append(W_conv_2)
+ W_conv_2 = weight_variable(name + "_conv_2", [3, 3, channels, channels])
+ self.add_weights(W_conv_2)
net = conv2d(net, W_conv_2)
net = self.batch_norm(net)
@@ -494,36 +568,41 @@ def construct_net(self, planes):
# Input convolution
flow = self.conv_block(x_planes, filter_size=3,
input_channels=18,
- output_channels=self.RESIDUAL_FILTERS)
+ output_channels=self.RESIDUAL_FILTERS,
+ name="first_conv")
# Residual tower
- for _ in range(0, self.RESIDUAL_BLOCKS):
- flow = self.residual_block(flow, self.RESIDUAL_FILTERS)
+ for i in range(0, self.RESIDUAL_BLOCKS):
+ block_name = "res_" + str(i)
+ flow = self.residual_block(flow, self.RESIDUAL_FILTERS,
+ name=block_name)
# Policy head
conv_pol = self.conv_block(flow, filter_size=1,
input_channels=self.RESIDUAL_FILTERS,
- output_channels=2)
- h_conv_pol_flat = tf.reshape(conv_pol, [-1, 2*19*19])
- W_fc1 = weight_variable([2 * 19 * 19, (19 * 19) + 1])
- b_fc1 = bias_variable([(19 * 19) + 1])
- self.weights.append(W_fc1)
- self.weights.append(b_fc1)
+ output_channels=2,
+ name="policy_head")
+ h_conv_pol_flat = tf.reshape(conv_pol, [-1, 2 * 19 * 19])
+ W_fc1 = weight_variable("w_fc_1", [2 * 19 * 19, (19 * 19) + 1])
+ b_fc1 = bias_variable("b_fc_1", [(19 * 19) + 1])
+ self.add_weights(W_fc1)
+ self.add_weights(b_fc1)
h_fc1 = tf.add(tf.matmul(h_conv_pol_flat, W_fc1), b_fc1)
# Value head
conv_val = self.conv_block(flow, filter_size=1,
input_channels=self.RESIDUAL_FILTERS,
- output_channels=1)
- h_conv_val_flat = tf.reshape(conv_val, [-1, 19*19])
- W_fc2 = weight_variable([19 * 19, 256])
- b_fc2 = bias_variable([256])
- self.weights.append(W_fc2)
- self.weights.append(b_fc2)
+ output_channels=1,
+ name="value_head")
+ h_conv_val_flat = tf.reshape(conv_val, [-1, 19 * 19])
+ W_fc2 = weight_variable("w_fc_2", [19 * 19, 256])
+ b_fc2 = bias_variable("b_fc_2", [256])
+ self.add_weights(W_fc2)
+ self.add_weights(b_fc2)
h_fc2 = tf.nn.relu(tf.add(tf.matmul(h_conv_val_flat, W_fc2), b_fc2))
- W_fc3 = weight_variable([256, 1])
- b_fc3 = bias_variable([1])
- self.weights.append(W_fc3)
- self.weights.append(b_fc3)
+ W_fc3 = weight_variable("w_fc_3", [256, 1])
+ b_fc3 = bias_variable("b_fc_3", [1])
+ self.add_weights(W_fc3)
+ self.add_weights(b_fc3)
h_fc3 = tf.nn.tanh(tf.add(tf.matmul(h_fc2, W_fc3), b_fc3))
return h_fc1, h_fc3
From 237b5785aed91faceb7defcc6070e80373c4fb0d Mon Sep 17 00:00:00 2001
From: bittsitt <38073282+bittsitt@users.noreply.github.com>
Date: Mon, 14 May 2018 11:40:13 +0200
Subject: [PATCH 02/18] Extend GTP to support real time search info.
* Extend GTP to add support for displaying winrates and variations
from LZ while LZ is thinking.
* Use UCI format for lz-analyze and lz-genmove-analyze.
* Don't sort gtp lz-analyze ouput because it is not thread-safe.
Pull request #1388.
---
src/GTP.cpp | 56 +++++++++++++++++++++++++++++++++++++++++++++--
src/GTP.h | 1 +
src/UCTSearch.cpp | 45 +++++++++++++++++++++++++++++++++++--
src/UCTSearch.h | 1 +
src/Utils.cpp | 27 +++++++++++++----------
src/Utils.h | 1 +
6 files changed, 115 insertions(+), 16 deletions(-)
diff --git a/src/GTP.cpp b/src/GTP.cpp
index 6969fabf6..efb2a402f 100644
--- a/src/GTP.cpp
+++ b/src/GTP.cpp
@@ -74,6 +74,7 @@ FILE* cfg_logfile_handle;
bool cfg_quiet;
std::string cfg_options_str;
bool cfg_benchmark;
+int cfg_analyze_interval_centis;
void GTP::setup_default_parameters() {
cfg_gtp_mode = false;
@@ -107,6 +108,7 @@ void GTP::setup_default_parameters() {
cfg_logfile_handle = nullptr;
cfg_quiet = false;
cfg_benchmark = false;
+ cfg_analyze_interval_centis = 0;
// C++11 doesn't guarantee *anything* about how random this is,
// and in MinGW it isn't random at all. But we can mix it in, which
@@ -147,6 +149,8 @@ const std::string GTP::s_commands[] = {
"kgs-time_settings",
"kgs-game_over",
"heatmap",
+ "lz-analyze",
+ "lz-genmove_analyze",
""
};
@@ -345,12 +349,18 @@ bool GTP::execute(GameState & game, std::string xinput) {
}
}
return true;
- } else if (command.find("genmove") == 0) {
+ } else if (command.find("genmove") == 0 || command.find("lz-genmove_analyze") == 0) {
+ auto analysis_output = command.find("lz-genmove_analyze") == 0;
+ auto interval = 0;
+
std::istringstream cmdstream(command);
std::string tmp;
cmdstream >> tmp; // eat genmove
cmdstream >> tmp;
+ if (analysis_output) {
+ cmdstream >> interval;
+ }
if (!cmdstream.fail()) {
int who;
@@ -362,24 +372,66 @@ bool GTP::execute(GameState & game, std::string xinput) {
gtp_fail_printf(id, "syntax error");
return 1;
}
+ if (analysis_output) {
+ // Start of multi-line response
+ cfg_analyze_interval_centis = interval;
+ if (id != -1) gtp_printf_raw("=%d\n", id);
+ else gtp_printf_raw("=\n");
+ }
// start thinking
{
game.set_to_move(who);
+ // Outputs winrate and pvs for lz-genmove_analyze
int move = search->think(who);
game.play_move(move);
std::string vertex = game.move_to_text(move);
- gtp_printf(id, "%s", vertex.c_str());
+ if (!analysis_output) {
+ gtp_printf(id, "%s", vertex.c_str());
+ } else {
+ gtp_printf_raw("play %s\n", vertex.c_str());
+ }
}
if (cfg_allow_pondering) {
// now start pondering
if (!game.has_resigned()) {
+ // Outputs winrate and pvs through gtp for lz-genmove_analyze
search->ponder();
}
}
+ if (analysis_output) {
+ // Terminate multi-line response
+ gtp_printf_raw("\n");
+ }
} else {
gtp_fail_printf(id, "syntax not understood");
}
+ analysis_output = false;
+ return true;
+ } else if (command.find("lz-analyze") == 0) {
+ std::istringstream cmdstream(command);
+ std::string tmp;
+ int interval;
+
+ cmdstream >> tmp; // eat lz-analyze
+ cmdstream >> interval;
+ if (!cmdstream.fail()) {
+ cfg_analyze_interval_centis = interval;
+ } else {
+ gtp_fail_printf(id, "syntax not understood");
+ return true;
+ }
+ // Start multi-line response
+ if (id != -1) gtp_printf_raw("=%d\n", id);
+ else gtp_printf_raw("=\n");
+ // now start pondering
+ if (!game.has_resigned()) {
+ // Outputs winrate and pvs through gtp
+ search->ponder();
+ }
+ cfg_analyze_interval_centis = 0;
+ // Terminate multi-line response
+ gtp_printf_raw("\n");
return true;
} else if (command.find("kgs-genmove_cleanup") == 0) {
std::istringstream cmdstream(command);
diff --git a/src/GTP.h b/src/GTP.h
index 92985fd6c..2e5c00d1e 100644
--- a/src/GTP.h
+++ b/src/GTP.h
@@ -57,6 +57,7 @@ extern FILE* cfg_logfile_handle;
extern bool cfg_quiet;
extern std::string cfg_options_str;
extern bool cfg_benchmark;
+extern int cfg_analyze_interval_centis;
/*
A list of all valid GTP2 commands is defined here:
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index accb3352f..b3f4eb0f3 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -235,6 +235,30 @@ void UCTSearch::dump_stats(FastState & state, UCTNode & parent) {
tree_stats(parent);
}
+void UCTSearch::output_analysis(FastState & state, UCTNode & parent) {
+ if (!parent.has_children()) {
+ return;
+ }
+
+ const int color = state.get_to_move();
+
+ std::string separator = "info";
+ for (const auto& node : parent.get_children()) {
+ // Only send variations with visits
+ if (!node->get_visits()) continue;
+
+ std::string move = state.move_to_text(node->get_move());
+ FastState tmpstate = state;
+ tmpstate.play_move(node->get_move());
+ std::string pv = move + " " + get_pv(tmpstate, *node);
+ gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(), "move", move.c_str(), "visits", node->get_visits(),
+ "winrate", node->get_visits() ? (int)(node->get_eval(color)*10000) : 0, "pv", pv.c_str());
+ separator = " info";
+ }
+ gtp_printf_raw("\n");
+
+}
+
void tree_stats_helper(const UCTNode& node, size_t depth,
size_t& nodes, size_t& non_leaf_nodes,
size_t& depth_sum, size_t& max_depth,
@@ -600,8 +624,9 @@ int UCTSearch::think(int color, passflag_t passflag) {
tg.add_task(UCTWorker(m_rootstate, this, m_root.get()));
}
- bool keeprunning = true;
- int last_update = 0;
+ auto keeprunning = true;
+ auto last_update = 0;
+ auto last_output = 0;
do {
auto currstate = std::make_unique(m_rootstate);
@@ -613,6 +638,12 @@ int UCTSearch::think(int color, passflag_t passflag) {
Time elapsed;
int elapsed_centis = Time::timediff_centis(start, elapsed);
+ if (cfg_analyze_interval_centis &&
+ elapsed_centis - last_output > cfg_analyze_interval_centis) {
+ last_output = elapsed_centis;
+ output_analysis(m_rootstate, *m_root);
+ }
+
// output some stats every few seconds
// check if we should still search
if (elapsed_centis - last_update > 250) {
@@ -670,13 +701,23 @@ void UCTSearch::ponder() {
for (int i = 1; i < cfg_num_threads; i++) {
tg.add_task(UCTWorker(m_rootstate, this, m_root.get()));
}
+ Time start;
auto keeprunning = true;
+ auto last_output = 0;
do {
auto currstate = std::make_unique(m_rootstate);
auto result = play_simulation(*currstate, m_root.get());
if (result.valid()) {
increment_playouts();
}
+ if (cfg_analyze_interval_centis) {
+ Time elapsed;
+ int elapsed_centis = Time::timediff_centis(start, elapsed);
+ if (elapsed_centis - last_output > cfg_analyze_interval_centis) {
+ last_output = elapsed_centis;
+ output_analysis(m_rootstate, *m_root);
+ }
+ }
keeprunning = is_running();
keeprunning &= !stop_thinking(0, 1);
} while (!Utils::input_pending() && keeprunning);
diff --git a/src/UCTSearch.h b/src/UCTSearch.h
index a99e10f91..f2425039b 100644
--- a/src/UCTSearch.h
+++ b/src/UCTSearch.h
@@ -114,6 +114,7 @@ class UCTSearch {
int get_best_move(passflag_t passflag);
void update_root();
bool advance_to_new_rootstate();
+ void output_analysis(FastState & state, UCTNode & parent);
GameState & m_rootstate;
std::unique_ptr m_last_rootstate;
diff --git a/src/Utils.cpp b/src/Utils.cpp
index 84d291362..d0bebb1f1 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -94,25 +94,17 @@ void Utils::myprintf(const char *fmt, ...) {
}
}
-static void gtp_fprintf(FILE* file, const std::string& prefix,
- const char *fmt, va_list ap) {
- fprintf(file, "%s ", prefix.c_str());
- vfprintf(file, fmt, ap);
- fprintf(file, "\n\n");
-}
-
static void gtp_base_printf(int id, std::string prefix,
const char *fmt, va_list ap) {
if (id != -1) {
prefix += std::to_string(id);
}
+ prefix += " ";
- gtp_fprintf(stdout, prefix, fmt, ap);
+ Utils::gtp_printf_raw(prefix.c_str());
+ Utils::gtp_printf_raw(fmt, ap);
+ Utils::gtp_printf_raw("\n\n");
- if (cfg_logfile_handle) {
- std::lock_guard lock(IOmutex);
- gtp_fprintf(cfg_logfile_handle, prefix, fmt, ap);
- }
}
void Utils::gtp_printf(int id, const char *fmt, ...) {
@@ -122,6 +114,17 @@ void Utils::gtp_printf(int id, const char *fmt, ...) {
va_end(ap);
}
+void Utils::gtp_printf_raw(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ if (cfg_logfile_handle) {
+ std::lock_guard lock(IOmutex);
+ vfprintf(cfg_logfile_handle, fmt, ap);
+ }
+ va_end(ap);
+}
+
void Utils::gtp_fail_printf(int id, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
diff --git a/src/Utils.h b/src/Utils.h
index 33a10aef7..ae8c02704 100644
--- a/src/Utils.h
+++ b/src/Utils.h
@@ -32,6 +32,7 @@ extern Utils::ThreadPool thread_pool;
namespace Utils {
void myprintf(const char *fmt, ...);
void gtp_printf(int id, const char *fmt, ...);
+ void gtp_printf_raw(const char *fmt, ...);
void gtp_fail_printf(int id, const char *fmt, ...);
void log_input(const std::string& input);
bool input_pending();
From 6e847e134df63bc15fe1fa4c2c1989f37bf225a7 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto
Date: Mon, 14 May 2018 13:08:04 +0200
Subject: [PATCH 03/18] Remove virtual loss from eval for live stats.
For discussion see pull request #1412.
---
src/UCTNode.cpp | 12 ++++++++++++
src/UCTNode.h | 1 +
src/UCTSearch.cpp | 12 ++++++++----
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index f5649998c..1fffe16fa 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -204,6 +204,18 @@ int UCTNode::get_visits() const {
return m_visits;
}
+// Return the true score, without taking into account virtual losses.
+float UCTNode::get_pure_eval(int tomove) const {
+ auto visits = get_visits();
+ assert(visits > 0);
+ auto blackeval = get_blackevals();
+ auto score = static_cast(blackeval / double(visits));
+ if (tomove == FastBoard::WHITE) {
+ score = 1.0f - score;
+ }
+ return score;
+}
+
float UCTNode::get_eval(int tomove) const {
// Due to the use of atomic updates and virtual losses, it is
// possible for the visit count to change underneath us. Make sure
diff --git a/src/UCTNode.h b/src/UCTNode.h
index 9c88961e7..44a54a809 100644
--- a/src/UCTNode.h
+++ b/src/UCTNode.h
@@ -66,6 +66,7 @@ class UCTNode {
float get_score() const;
void set_score(float score);
float get_eval(int tomove) const;
+ float get_pure_eval(int tomove) const;
float get_net_eval(int tomove) const;
void virtual_loss(void);
void virtual_loss_undo(void);
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index b3f4eb0f3..cf6e9de90 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -228,7 +228,7 @@ void UCTSearch::dump_stats(FastState & state, UCTNode & parent) {
myprintf("%4s -> %7d (V: %5.2f%%) (N: %5.2f%%) PV: %s\n",
move.c_str(),
node->get_visits(),
- node->get_visits() ? node->get_eval(color)*100.0f : 0.0f,
+ node->get_visits() ? node->get_pure_eval(color)*100.0f : 0.0f,
node->get_score() * 100.0f,
pv.c_str());
}
@@ -251,8 +251,12 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) {
FastState tmpstate = state;
tmpstate.play_move(node->get_move());
std::string pv = move + " " + get_pv(tmpstate, *node);
- gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(), "move", move.c_str(), "visits", node->get_visits(),
- "winrate", node->get_visits() ? (int)(node->get_eval(color)*10000) : 0, "pv", pv.c_str());
+ auto move_eval = node->get_visits() ?
+ static_cast(node->get_pure_eval(color) * 10000) : 0;
+ gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(),
+ "move", move.c_str(), "visits", node->get_visits(),
+ "winrate", move_eval,
+ "pv", pv.c_str());
separator = " info";
}
gtp_printf_raw("\n");
@@ -489,7 +493,7 @@ void UCTSearch::dump_analysis(int playouts) {
int color = tempstate.board.get_to_move();
std::string pvstring = get_pv(tempstate, *m_root);
- float winrate = 100.0f * m_root->get_eval(color);
+ float winrate = 100.0f * m_root->get_pure_eval(color);
myprintf("Playouts: %d, Win: %5.2f%%, PV: %s\n",
playouts, winrate, pvstring.c_str());
}
From 9fd7542d7b918f432c9ab985433ea42565edf1f1 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto
Date: Mon, 14 May 2018 13:14:43 +0200
Subject: [PATCH 04/18] Make analysis output use one move per line.
More in line with UCI, cleaner, easier to parse, smaller code.
---
src/UCTSearch.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index cf6e9de90..63dfe254b 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -242,7 +242,6 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) {
const int color = state.get_to_move();
- std::string separator = "info";
for (const auto& node : parent.get_children()) {
// Only send variations with visits
if (!node->get_visits()) continue;
@@ -253,14 +252,12 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) {
std::string pv = move + " " + get_pv(tmpstate, *node);
auto move_eval = node->get_visits() ?
static_cast(node->get_pure_eval(color) * 10000) : 0;
- gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(),
- "move", move.c_str(), "visits", node->get_visits(),
+ gtp_printf_raw("info %s %s %s %d %s %d %s %s\n",
+ "move", move.c_str(),
+ "visits", node->get_visits(),
"winrate", move_eval,
"pv", pv.c_str());
- separator = " info";
}
- gtp_printf_raw("\n");
-
}
void tree_stats_helper(const UCTNode& node, size_t depth,
From 1b64435d395ffccbe2032652a73b0da50b93eca1 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto
Date: Mon, 14 May 2018 16:52:37 +0200
Subject: [PATCH 05/18] Remove versioned clang from Makefile.
Don't hardcode the clang version in the Makefile.
---
src/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Makefile b/src/Makefile
index b9b9cf4e5..f3890836d 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -16,7 +16,7 @@ debug:
clang:
@echo "Detected OS: ${THE_OS}"
- $(MAKE) CC=clang-5.0 CXX=clang++-5.0 \
+ $(MAKE) CC=clang CXX=clang++ \
CXXFLAGS='$(CXXFLAGS) -Wall -Wextra -Wno-missing-braces -O3 -ffast-math -flto -march=native -std=c++14 -DNDEBUG' \
LDFLAGS='$(LDFLAGS) -flto -fuse-linker-plugin' \
leelaz
From 62ddf58027da1b2952f0934eddd820a11d3879b8 Mon Sep 17 00:00:00 2001
From: Gian-Carlo Pascutto
Date: Mon, 14 May 2018 17:42:29 +0200
Subject: [PATCH 06/18] Fix varargs usage.
Regression from #1388. Fixes issue #1424.
---
src/Utils.cpp | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/Utils.cpp b/src/Utils.cpp
index d0bebb1f1..735d4af50 100644
--- a/src/Utils.cpp
+++ b/src/Utils.cpp
@@ -94,17 +94,23 @@ void Utils::myprintf(const char *fmt, ...) {
}
}
+static void gtp_fprintf(FILE* file, const std::string& prefix,
+ const char *fmt, va_list ap) {
+ fprintf(file, "%s ", prefix.c_str());
+ vfprintf(file, fmt, ap);
+ fprintf(file, "\n\n");
+}
+
static void gtp_base_printf(int id, std::string prefix,
const char *fmt, va_list ap) {
if (id != -1) {
prefix += std::to_string(id);
}
- prefix += " ";
-
- Utils::gtp_printf_raw(prefix.c_str());
- Utils::gtp_printf_raw(fmt, ap);
- Utils::gtp_printf_raw("\n\n");
-
+ gtp_fprintf(stdout, prefix, fmt, ap);
+ if (cfg_logfile_handle) {
+ std::lock_guard lock(IOmutex);
+ gtp_fprintf(cfg_logfile_handle, prefix, fmt, ap);
+ }
}
void Utils::gtp_printf(int id, const char *fmt, ...) {
@@ -118,11 +124,14 @@ void Utils::gtp_printf_raw(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
vfprintf(stdout, fmt, ap);
+ va_end(ap);
+
if (cfg_logfile_handle) {
std::lock_guard lock(IOmutex);
+ va_start(ap, fmt);
vfprintf(cfg_logfile_handle, fmt, ap);
+ va_end(ap);
}
- va_end(ap);
}
void Utils::gtp_fail_printf(int id, const char *fmt, ...) {
From 4d3f6444cffb8354501f2806e98caf430cd73f9f Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Wed, 16 May 2018 03:59:19 -0400
Subject: [PATCH 07/18] make c_puct proportional to winrate
---
src/UCTNode.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index 1fffe16fa..ff5d14fe8 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -269,11 +269,13 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
// Lower the expected eval for moves that are likely not the best.
// Do not do this if we have introduced noise at this node exactly
// to explore more.
+
+ auto pure_eval = get_pure_eval(color);
if (!is_root || !cfg_noise) {
- fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy);
+ fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5;
}
// Estimated eval for unknown nodes = original parent NN eval - reduction
- auto fpu_eval = get_net_eval(color) - fpu_reduction;
+ auto fpu_eval = pure_eval - fpu_reduction;
auto best = static_cast(nullptr);
auto best_value = std::numeric_limits::lowest();
@@ -289,7 +291,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
}
auto psa = child.get_score();
auto denom = 1.0 + child.get_visits();
- auto puct = cfg_puct * psa * (numerator / denom);
+ auto puct = cfg_puct * psa * (numerator / denom) * pure_eval / 0.5;
auto value = winrate + puct;
assert(value > std::numeric_limits::lowest());
From c165c0514a934a1a8eb1a6384b1914dc664fa085 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Wed, 16 May 2018 22:35:14 -0400
Subject: [PATCH 08/18] revise comment about fpu
---
src/UCTNode.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index ff5d14fe8..6d35b95f6 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -274,7 +274,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
if (!is_root || !cfg_noise) {
fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5;
}
- // Estimated eval for unknown nodes = original parent NN eval - reduction
+ // Estimated eval for unknown nodes = current parent winrate - reduction
auto fpu_eval = pure_eval - fpu_reduction;
auto best = static_cast(nullptr);
From a63ee5fe68d7a2cf109ff97b7610deb233db04a1 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Wed, 23 May 2018 16:43:05 -0400
Subject: [PATCH 09/18] fix division by zero with multi threads
---
src/UCTNode.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index 6d35b95f6..e50dacdcd 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -75,7 +75,6 @@ bool UCTNode::create_children(std::atomic& nodecount,
}
// We'll be the one queueing this node for expansion, stop others
m_is_expanding = true;
- lock.unlock();
const auto raw_netlist = Network::get_scored_moves(
&state, Network::Ensemble::RANDOM_SYMMETRY);
@@ -87,6 +86,8 @@ bool UCTNode::create_children(std::atomic& nodecount,
if (state.board.white_to_move()) {
m_net_eval = 1.0f - m_net_eval;
}
+ update(m_net_eval);
+ lock.unlock();
eval = m_net_eval;
std::vector nodelist;
From ae4d04d69211f78b88afee377a8fd2a50ff42581 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Wed, 23 May 2018 16:43:59 -0400
Subject: [PATCH 10/18] fix division by zero with multi threads
---
src/UCTSearch.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index 63dfe254b..9ccb9639c 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -168,6 +168,7 @@ SearchResult UCTSearch::play_simulation(GameState & currstate,
if (currstate.get_passes() >= 2) {
auto score = currstate.final_score();
result = SearchResult::from_score(score);
+ node->update(result.eval());
} else if (m_nodes < MAX_TREE_SIZE) {
float eval;
const auto had_children = node->has_children();
@@ -189,12 +190,13 @@ SearchResult UCTSearch::play_simulation(GameState & currstate,
next->invalidate();
} else {
result = play_simulation(currstate, next);
+ if (result.valid()) {
+ node->update(result.eval());
+ }
}
}
- if (result.valid()) {
- node->update(result.eval());
- }
+
node->virtual_loss_undo();
return result;
From bbfb44597a13d1ca3a514950d50380ca47662f6c Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Wed, 23 May 2018 20:01:41 -0400
Subject: [PATCH 11/18] revert unnecessarily extending lock
---
src/UCTNode.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index e50dacdcd..961dfa22f 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -75,6 +75,7 @@ bool UCTNode::create_children(std::atomic& nodecount,
}
// We'll be the one queueing this node for expansion, stop others
m_is_expanding = true;
+ lock.unlock();
const auto raw_netlist = Network::get_scored_moves(
&state, Network::Ensemble::RANDOM_SYMMETRY);
@@ -87,7 +88,6 @@ bool UCTNode::create_children(std::atomic& nodecount,
m_net_eval = 1.0f - m_net_eval;
}
update(m_net_eval);
- lock.unlock();
eval = m_net_eval;
std::vector nodelist;
From 05380eb85f61caa263deedd7116fb1273bdaeaac Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Tue, 12 Jun 2018 00:49:43 -0400
Subject: [PATCH 12/18] Update UCTNode.cpp
---
src/UCTNode.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index 961dfa22f..d9e172371 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -271,12 +271,11 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
// Do not do this if we have introduced noise at this node exactly
// to explore more.
- auto pure_eval = get_pure_eval(color);
if (!is_root || !cfg_noise) {
- fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5;
+ fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy);
}
// Estimated eval for unknown nodes = current parent winrate - reduction
- auto fpu_eval = pure_eval - fpu_reduction;
+ auto fpu_eval = get_pure_eval(color) - fpu_reduction;
auto best = static_cast(nullptr);
auto best_value = std::numeric_limits::lowest();
@@ -292,7 +291,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
}
auto psa = child.get_score();
auto denom = 1.0 + child.get_visits();
- auto puct = cfg_puct * psa * (numerator / denom) * pure_eval / 0.5;
+ auto puct = cfg_puct * psa * (numerator / denom);
auto value = winrate + puct;
assert(value > std::numeric_limits::lowest());
From 2c3e50fc316d5196486b91779beec08788530de8 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Tue, 12 Jun 2018 00:50:32 -0400
Subject: [PATCH 13/18] Update config.h
---
src/config.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/config.h b/src/config.h
index 100e3e998..1e8d3a4d2 100644
--- a/src/config.h
+++ b/src/config.h
@@ -78,7 +78,7 @@
* USE_TUNER: Expose some extra command line parameters that allow tuning the
* search algorithm.
*/
-//#define USE_TUNER
+#define USE_TUNER
#define PROGRAM_NAME "Leela Zero"
#define PROGRAM_VERSION "0.15"
From 62533d2009c16323e4b72948c59679be92bc6b70 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Mon, 18 Jun 2018 00:08:38 -0400
Subject: [PATCH 14/18] Update config.h
---
src/config.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/config.h b/src/config.h
index 1e8d3a4d2..100e3e998 100644
--- a/src/config.h
+++ b/src/config.h
@@ -78,7 +78,7 @@
* USE_TUNER: Expose some extra command line parameters that allow tuning the
* search algorithm.
*/
-#define USE_TUNER
+//#define USE_TUNER
#define PROGRAM_NAME "Leela Zero"
#define PROGRAM_VERSION "0.15"
From fe75facff3382d9e71df85590b47162b5da46729 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Mon, 18 Jun 2018 00:11:14 -0400
Subject: [PATCH 15/18] Update UCTNode.cpp
---
src/UCTNode.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index d9e172371..aaa8a6b5d 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -275,7 +275,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy);
}
// Estimated eval for unknown nodes = current parent winrate - reduction
- auto fpu_eval = get_pure_eval(color) - fpu_reduction;
+ auto fpu_eval = get_net_eval(color) - fpu_reduction;
auto best = static_cast(nullptr);
auto best_value = std::numeric_limits::lowest();
From 4758de09209ebf899e2b1b7f2590f45dc806a2e5 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Mon, 18 Jun 2018 00:13:14 -0400
Subject: [PATCH 16/18] Update UCTNode.cpp
---
src/UCTNode.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index aaa8a6b5d..c4f5b33dd 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -274,7 +274,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
if (!is_root || !cfg_noise) {
fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy);
}
- // Estimated eval for unknown nodes = current parent winrate - reduction
+ // Estimated eval for unknown nodes = original parent NN eval - reduction
auto fpu_eval = get_net_eval(color) - fpu_reduction;
auto best = static_cast(nullptr);
From c5986b063ec7b11f193361c21f1bb2300f9dde3c Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Mon, 18 Jun 2018 00:16:44 -0400
Subject: [PATCH 17/18] Update UCTNode.cpp
---
src/UCTNode.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp
index c4f5b33dd..3f008e4f1 100644
--- a/src/UCTNode.cpp
+++ b/src/UCTNode.cpp
@@ -270,7 +270,6 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) {
// Lower the expected eval for moves that are likely not the best.
// Do not do this if we have introduced noise at this node exactly
// to explore more.
-
if (!is_root || !cfg_noise) {
fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy);
}
From d25f752a3315aa99ce4e44c10220c0f01e4c41c7 Mon Sep 17 00:00:00 2001
From: Junyan Xu
Date: Mon, 18 Jun 2018 00:18:13 -0400
Subject: [PATCH 18/18] Update UCTSearch.cpp
---
src/UCTSearch.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp
index 9ccb9639c..ae67077ff 100644
--- a/src/UCTSearch.cpp
+++ b/src/UCTSearch.cpp
@@ -196,7 +196,6 @@ SearchResult UCTSearch::play_simulation(GameState & currstate,
}
}
-
node->virtual_loss_undo();
return result;