From 648b230494c72a6ab39f85c9ddb73a703b184f43 Mon Sep 17 00:00:00 2001 From: Mankit Pong <2994070164@qq.com> Date: Mon, 14 May 2018 17:36:49 +0800 Subject: [PATCH 01/18] Add multi GPU training support. Pull request #1386. --- training/tf/tfprocess.py | 213 +++++++++++++++++++++++++++------------ 1 file changed, 146 insertions(+), 67 deletions(-) diff --git a/training/tf/tfprocess.py b/training/tf/tfprocess.py index 7542ce3bc..0dba54c7a 100644 --- a/training/tf/tfprocess.py +++ b/training/tf/tfprocess.py @@ -16,27 +16,30 @@ # You should have received a copy of the GNU General Public License # along with Leela Zero. If not, see . +import math import numpy as np import os -import random import tensorflow as tf import time import unittest -def weight_variable(shape): + +def weight_variable(name, shape): """Xavier initialization""" stddev = np.sqrt(2.0 / (sum(shape))) initial = tf.truncated_normal(shape, stddev=stddev) - weights = tf.Variable(initial) + weights = tf.get_variable(name, initializer=initial) tf.add_to_collection(tf.GraphKeys.WEIGHTS, weights) return weights # Bias weights for layers not followed by BatchNorm # We do not regularlize biases, so they are not # added to the regularlizer collection -def bias_variable(shape): +def bias_variable(name, shape): initial = tf.constant(0.0, shape=shape) - return tf.Variable(initial) + bias = tf.get_variable(name, initializer=initial) + return bias + def conv2d(x, W): return tf.nn.conv2d(x, W, data_format='NCHW', @@ -108,6 +111,9 @@ def __init__(self): self.RESIDUAL_FILTERS = 128 self.RESIDUAL_BLOCKS = 6 + # Set number of GPUs for training + self.gpus_num = 1 + # For exporting self.weights = [] @@ -156,13 +162,55 @@ def init(self, batch_size, macrobatch=1, logbase='leelalogs'): self.init_net(planes, probs, winner) def init_net(self, planes, probs, winner): - self.x = planes # (tf.float32, [None, 18, 19 * 19]) - self.y_ = probs # (tf.float32, [None, 362]) - self.z_ = winner # (tf.float32, [None, 1]) + self.y_ = probs # (tf.float32, [None, 362]) + self.sx = tf.split(planes, self.gpus_num) + self.sy_ = tf.split(probs, self.gpus_num) + self.sz_ = tf.split(winner, self.gpus_num) self.batch_norm_count = 0 - self.y_conv, self.z_conv = self.construct_net(self.x) + self.reuse_var = None - if self.swa_enabled == True: + # You need to change the learning rate here if you are training + # from a self-play training set, for example start with 0.005 instead. + opt = tf.train.MomentumOptimizer( + learning_rate=0.05, momentum=0.9, use_nesterov=True) + + # Construct net here. + tower_grads = [] + tower_loss = [] + tower_policy_loss = [] + tower_mse_loss = [] + tower_reg_term = [] + tower_y_conv = [] + with tf.variable_scope(tf.get_variable_scope()): + for i in range(self.gpus_num): + with tf.device("/gpu:%d" % i): + with tf.name_scope("tower_%d" % i): + loss, policy_loss, mse_loss, reg_term, y_conv = self.tower_loss( + self.sx[i], self.sy_[i], self.sz_[i]) + + # Reset batchnorm key to 0. + self.reset_batchnorm_key() + + tf.get_variable_scope().reuse_variables() + grads = opt.compute_gradients(loss) + + tower_grads.append(grads) + tower_loss.append(loss) + tower_policy_loss.append(policy_loss) + tower_mse_loss.append(mse_loss) + tower_reg_term.append(reg_term) + tower_y_conv.append(y_conv) + + # Average gradients from different GPUs + self.loss = tf.reduce_mean(tower_loss) + self.policy_loss = tf.reduce_mean(tower_policy_loss) + self.mse_loss = tf.reduce_mean(tower_mse_loss) + self.reg_term = tf.reduce_mean(tower_reg_term) + self.y_conv = tf.concat(tower_y_conv, axis=0) + self.mean_grads = self.average_gradients(tower_grads) + + # Do swa after we contruct the net + if self.swa_enabled is True: # Count of networks accumulated into SWA self.swa_count = tf.Variable(0., name='swa_count', trainable=False) # Count of networks to skip @@ -183,38 +231,13 @@ def init_net(self, planes, probs, winner): self.swa_accum_op = tf.assign_add(n, 1.) self.swa_load_op = tf.group(*load) - # Calculate loss on policy head - cross_entropy = \ - tf.nn.softmax_cross_entropy_with_logits(labels=self.y_, - logits=self.y_conv) - self.policy_loss = tf.reduce_mean(cross_entropy) - - # Loss on value head - self.mse_loss = \ - tf.reduce_mean(tf.squared_difference(self.z_, self.z_conv)) - - # Regularizer - regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001) - reg_variables = tf.get_collection(tf.GraphKeys.WEIGHTS) - self.reg_term = \ - tf.contrib.layers.apply_regularization(regularizer, reg_variables) - - # For training from a (smaller) dataset of strong players, you will - # want to reduce the factor in front of self.mse_loss here. - self.loss = 1.0 * self.policy_loss + 1.0 * self.mse_loss + self.reg_term - - # You need to change the learning rate here if you are training - # from a self-play training set, for example start with 0.005 instead. - opt = tf.train.MomentumOptimizer( - learning_rate=0.05, momentum=0.9, use_nesterov=True) - - # Compute and accumulate gradients + # Accumulate gradients self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) total_grad=[] grad_ops=[] clear_var=[] with tf.control_dependencies(self.update_ops): - self.grad_op_real = opt.compute_gradients(self.loss) + self.grad_op_real = self.mean_grads for (g, v) in self.grad_op_real: if g is None: total_grad.append((g,v)) @@ -261,6 +284,47 @@ def init_net(self, planes, probs, winner): # Initialize all variables self.session.run(tf.global_variables_initializer()) + def average_gradients(self, tower_grads): + # Average gradients from different GPUs + average_grads = [] + for grad_and_vars in zip(*tower_grads): + grads = [] + for g, _ in grad_and_vars: + expanded_g = tf.expand_dims(g, dim=0) + grads.append(expanded_g) + + grad = tf.concat(grads, axis=0) + grad = tf.reduce_mean(grad, reduction_indices=0) + + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + def tower_loss(self, x, y_, z_): + y_conv, z_conv = self.construct_net(x) + # Calculate loss on policy head + cross_entropy = \ + tf.nn.softmax_cross_entropy_with_logits(labels=y_, + logits=y_conv) + policy_loss = tf.reduce_mean(cross_entropy) + + # Loss on value head + mse_loss = \ + tf.reduce_mean(tf.squared_difference(z_, z_conv)) + + # Regularizer + regularizer = tf.contrib.layers.l2_regularizer(scale=0.0001) + reg_variables = tf.get_collection(tf.GraphKeys.WEIGHTS) + reg_term = \ + tf.contrib.layers.apply_regularization(regularizer, reg_variables) + + # For training from a (smaller) dataset of strong players, you will + # want to reduce the factor in front of self.mse_loss here. + loss = 1.0 * policy_loss + 1.0 * mse_loss + reg_term + + return loss, policy_loss, mse_loss, reg_term, y_conv + def assign(self, var, values): try: self.session.run(tf.assign(var, values)) @@ -432,6 +496,14 @@ def get_batchnorm_key(self): self.batch_norm_count += 1 return result + def reset_batchnorm_key(self): + self.batch_norm_count = 0 + self.reuse_var = True + + def add_weights(self, variable): + if self.reuse_var is None: + self.weights.append(variable) + def batch_norm(self, net): # The weights are internal to the batchnorm layer, so apply # a unique scope that we can store, and use to look them back up @@ -442,20 +514,22 @@ def batch_norm(self, net): net, epsilon=1e-5, axis=1, fused=True, center=True, scale=False, - training=self.training) + training=self.training, + reuse=self.reuse_var) for v in ['beta', 'moving_mean', 'moving_variance' ]: name = scope + '/batch_normalization/' + v + ':0' var = tf.get_default_graph().get_tensor_by_name(name) - self.weights.append(var) + self.add_weights(var) return net + def conv_block(self, inputs, filter_size, input_channels, output_channels, name): + W_conv = weight_variable( + name, + [filter_size, filter_size, input_channels, output_channels]) - def conv_block(self, inputs, filter_size, input_channels, output_channels): - W_conv = weight_variable([filter_size, filter_size, - input_channels, output_channels]) - self.weights.append(W_conv) + self.add_weights(W_conv) net = inputs net = conv2d(net, W_conv) @@ -463,21 +537,21 @@ def conv_block(self, inputs, filter_size, input_channels, output_channels): net = tf.nn.relu(net) return net - def residual_block(self, inputs, channels): + def residual_block(self, inputs, channels, name): net = inputs orig = tf.identity(net) # First convnet weights - W_conv_1 = weight_variable([3, 3, channels, channels]) - self.weights.append(W_conv_1) + W_conv_1 = weight_variable(name + "_conv_1", [3, 3, channels, channels]) + self.add_weights(W_conv_1) net = conv2d(net, W_conv_1) net = self.batch_norm(net) net = tf.nn.relu(net) # Second convnet weights - W_conv_2 = weight_variable([3, 3, channels, channels]) - self.weights.append(W_conv_2) + W_conv_2 = weight_variable(name + "_conv_2", [3, 3, channels, channels]) + self.add_weights(W_conv_2) net = conv2d(net, W_conv_2) net = self.batch_norm(net) @@ -494,36 +568,41 @@ def construct_net(self, planes): # Input convolution flow = self.conv_block(x_planes, filter_size=3, input_channels=18, - output_channels=self.RESIDUAL_FILTERS) + output_channels=self.RESIDUAL_FILTERS, + name="first_conv") # Residual tower - for _ in range(0, self.RESIDUAL_BLOCKS): - flow = self.residual_block(flow, self.RESIDUAL_FILTERS) + for i in range(0, self.RESIDUAL_BLOCKS): + block_name = "res_" + str(i) + flow = self.residual_block(flow, self.RESIDUAL_FILTERS, + name=block_name) # Policy head conv_pol = self.conv_block(flow, filter_size=1, input_channels=self.RESIDUAL_FILTERS, - output_channels=2) - h_conv_pol_flat = tf.reshape(conv_pol, [-1, 2*19*19]) - W_fc1 = weight_variable([2 * 19 * 19, (19 * 19) + 1]) - b_fc1 = bias_variable([(19 * 19) + 1]) - self.weights.append(W_fc1) - self.weights.append(b_fc1) + output_channels=2, + name="policy_head") + h_conv_pol_flat = tf.reshape(conv_pol, [-1, 2 * 19 * 19]) + W_fc1 = weight_variable("w_fc_1", [2 * 19 * 19, (19 * 19) + 1]) + b_fc1 = bias_variable("b_fc_1", [(19 * 19) + 1]) + self.add_weights(W_fc1) + self.add_weights(b_fc1) h_fc1 = tf.add(tf.matmul(h_conv_pol_flat, W_fc1), b_fc1) # Value head conv_val = self.conv_block(flow, filter_size=1, input_channels=self.RESIDUAL_FILTERS, - output_channels=1) - h_conv_val_flat = tf.reshape(conv_val, [-1, 19*19]) - W_fc2 = weight_variable([19 * 19, 256]) - b_fc2 = bias_variable([256]) - self.weights.append(W_fc2) - self.weights.append(b_fc2) + output_channels=1, + name="value_head") + h_conv_val_flat = tf.reshape(conv_val, [-1, 19 * 19]) + W_fc2 = weight_variable("w_fc_2", [19 * 19, 256]) + b_fc2 = bias_variable("b_fc_2", [256]) + self.add_weights(W_fc2) + self.add_weights(b_fc2) h_fc2 = tf.nn.relu(tf.add(tf.matmul(h_conv_val_flat, W_fc2), b_fc2)) - W_fc3 = weight_variable([256, 1]) - b_fc3 = bias_variable([1]) - self.weights.append(W_fc3) - self.weights.append(b_fc3) + W_fc3 = weight_variable("w_fc_3", [256, 1]) + b_fc3 = bias_variable("b_fc_3", [1]) + self.add_weights(W_fc3) + self.add_weights(b_fc3) h_fc3 = tf.nn.tanh(tf.add(tf.matmul(h_fc2, W_fc3), b_fc3)) return h_fc1, h_fc3 From 237b5785aed91faceb7defcc6070e80373c4fb0d Mon Sep 17 00:00:00 2001 From: bittsitt <38073282+bittsitt@users.noreply.github.com> Date: Mon, 14 May 2018 11:40:13 +0200 Subject: [PATCH 02/18] Extend GTP to support real time search info. * Extend GTP to add support for displaying winrates and variations from LZ while LZ is thinking. * Use UCI format for lz-analyze and lz-genmove-analyze. * Don't sort gtp lz-analyze ouput because it is not thread-safe. Pull request #1388. --- src/GTP.cpp | 56 +++++++++++++++++++++++++++++++++++++++++++++-- src/GTP.h | 1 + src/UCTSearch.cpp | 45 +++++++++++++++++++++++++++++++++++-- src/UCTSearch.h | 1 + src/Utils.cpp | 27 +++++++++++++---------- src/Utils.h | 1 + 6 files changed, 115 insertions(+), 16 deletions(-) diff --git a/src/GTP.cpp b/src/GTP.cpp index 6969fabf6..efb2a402f 100644 --- a/src/GTP.cpp +++ b/src/GTP.cpp @@ -74,6 +74,7 @@ FILE* cfg_logfile_handle; bool cfg_quiet; std::string cfg_options_str; bool cfg_benchmark; +int cfg_analyze_interval_centis; void GTP::setup_default_parameters() { cfg_gtp_mode = false; @@ -107,6 +108,7 @@ void GTP::setup_default_parameters() { cfg_logfile_handle = nullptr; cfg_quiet = false; cfg_benchmark = false; + cfg_analyze_interval_centis = 0; // C++11 doesn't guarantee *anything* about how random this is, // and in MinGW it isn't random at all. But we can mix it in, which @@ -147,6 +149,8 @@ const std::string GTP::s_commands[] = { "kgs-time_settings", "kgs-game_over", "heatmap", + "lz-analyze", + "lz-genmove_analyze", "" }; @@ -345,12 +349,18 @@ bool GTP::execute(GameState & game, std::string xinput) { } } return true; - } else if (command.find("genmove") == 0) { + } else if (command.find("genmove") == 0 || command.find("lz-genmove_analyze") == 0) { + auto analysis_output = command.find("lz-genmove_analyze") == 0; + auto interval = 0; + std::istringstream cmdstream(command); std::string tmp; cmdstream >> tmp; // eat genmove cmdstream >> tmp; + if (analysis_output) { + cmdstream >> interval; + } if (!cmdstream.fail()) { int who; @@ -362,24 +372,66 @@ bool GTP::execute(GameState & game, std::string xinput) { gtp_fail_printf(id, "syntax error"); return 1; } + if (analysis_output) { + // Start of multi-line response + cfg_analyze_interval_centis = interval; + if (id != -1) gtp_printf_raw("=%d\n", id); + else gtp_printf_raw("=\n"); + } // start thinking { game.set_to_move(who); + // Outputs winrate and pvs for lz-genmove_analyze int move = search->think(who); game.play_move(move); std::string vertex = game.move_to_text(move); - gtp_printf(id, "%s", vertex.c_str()); + if (!analysis_output) { + gtp_printf(id, "%s", vertex.c_str()); + } else { + gtp_printf_raw("play %s\n", vertex.c_str()); + } } if (cfg_allow_pondering) { // now start pondering if (!game.has_resigned()) { + // Outputs winrate and pvs through gtp for lz-genmove_analyze search->ponder(); } } + if (analysis_output) { + // Terminate multi-line response + gtp_printf_raw("\n"); + } } else { gtp_fail_printf(id, "syntax not understood"); } + analysis_output = false; + return true; + } else if (command.find("lz-analyze") == 0) { + std::istringstream cmdstream(command); + std::string tmp; + int interval; + + cmdstream >> tmp; // eat lz-analyze + cmdstream >> interval; + if (!cmdstream.fail()) { + cfg_analyze_interval_centis = interval; + } else { + gtp_fail_printf(id, "syntax not understood"); + return true; + } + // Start multi-line response + if (id != -1) gtp_printf_raw("=%d\n", id); + else gtp_printf_raw("=\n"); + // now start pondering + if (!game.has_resigned()) { + // Outputs winrate and pvs through gtp + search->ponder(); + } + cfg_analyze_interval_centis = 0; + // Terminate multi-line response + gtp_printf_raw("\n"); return true; } else if (command.find("kgs-genmove_cleanup") == 0) { std::istringstream cmdstream(command); diff --git a/src/GTP.h b/src/GTP.h index 92985fd6c..2e5c00d1e 100644 --- a/src/GTP.h +++ b/src/GTP.h @@ -57,6 +57,7 @@ extern FILE* cfg_logfile_handle; extern bool cfg_quiet; extern std::string cfg_options_str; extern bool cfg_benchmark; +extern int cfg_analyze_interval_centis; /* A list of all valid GTP2 commands is defined here: diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index accb3352f..b3f4eb0f3 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -235,6 +235,30 @@ void UCTSearch::dump_stats(FastState & state, UCTNode & parent) { tree_stats(parent); } +void UCTSearch::output_analysis(FastState & state, UCTNode & parent) { + if (!parent.has_children()) { + return; + } + + const int color = state.get_to_move(); + + std::string separator = "info"; + for (const auto& node : parent.get_children()) { + // Only send variations with visits + if (!node->get_visits()) continue; + + std::string move = state.move_to_text(node->get_move()); + FastState tmpstate = state; + tmpstate.play_move(node->get_move()); + std::string pv = move + " " + get_pv(tmpstate, *node); + gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(), "move", move.c_str(), "visits", node->get_visits(), + "winrate", node->get_visits() ? (int)(node->get_eval(color)*10000) : 0, "pv", pv.c_str()); + separator = " info"; + } + gtp_printf_raw("\n"); + +} + void tree_stats_helper(const UCTNode& node, size_t depth, size_t& nodes, size_t& non_leaf_nodes, size_t& depth_sum, size_t& max_depth, @@ -600,8 +624,9 @@ int UCTSearch::think(int color, passflag_t passflag) { tg.add_task(UCTWorker(m_rootstate, this, m_root.get())); } - bool keeprunning = true; - int last_update = 0; + auto keeprunning = true; + auto last_update = 0; + auto last_output = 0; do { auto currstate = std::make_unique(m_rootstate); @@ -613,6 +638,12 @@ int UCTSearch::think(int color, passflag_t passflag) { Time elapsed; int elapsed_centis = Time::timediff_centis(start, elapsed); + if (cfg_analyze_interval_centis && + elapsed_centis - last_output > cfg_analyze_interval_centis) { + last_output = elapsed_centis; + output_analysis(m_rootstate, *m_root); + } + // output some stats every few seconds // check if we should still search if (elapsed_centis - last_update > 250) { @@ -670,13 +701,23 @@ void UCTSearch::ponder() { for (int i = 1; i < cfg_num_threads; i++) { tg.add_task(UCTWorker(m_rootstate, this, m_root.get())); } + Time start; auto keeprunning = true; + auto last_output = 0; do { auto currstate = std::make_unique(m_rootstate); auto result = play_simulation(*currstate, m_root.get()); if (result.valid()) { increment_playouts(); } + if (cfg_analyze_interval_centis) { + Time elapsed; + int elapsed_centis = Time::timediff_centis(start, elapsed); + if (elapsed_centis - last_output > cfg_analyze_interval_centis) { + last_output = elapsed_centis; + output_analysis(m_rootstate, *m_root); + } + } keeprunning = is_running(); keeprunning &= !stop_thinking(0, 1); } while (!Utils::input_pending() && keeprunning); diff --git a/src/UCTSearch.h b/src/UCTSearch.h index a99e10f91..f2425039b 100644 --- a/src/UCTSearch.h +++ b/src/UCTSearch.h @@ -114,6 +114,7 @@ class UCTSearch { int get_best_move(passflag_t passflag); void update_root(); bool advance_to_new_rootstate(); + void output_analysis(FastState & state, UCTNode & parent); GameState & m_rootstate; std::unique_ptr m_last_rootstate; diff --git a/src/Utils.cpp b/src/Utils.cpp index 84d291362..d0bebb1f1 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -94,25 +94,17 @@ void Utils::myprintf(const char *fmt, ...) { } } -static void gtp_fprintf(FILE* file, const std::string& prefix, - const char *fmt, va_list ap) { - fprintf(file, "%s ", prefix.c_str()); - vfprintf(file, fmt, ap); - fprintf(file, "\n\n"); -} - static void gtp_base_printf(int id, std::string prefix, const char *fmt, va_list ap) { if (id != -1) { prefix += std::to_string(id); } + prefix += " "; - gtp_fprintf(stdout, prefix, fmt, ap); + Utils::gtp_printf_raw(prefix.c_str()); + Utils::gtp_printf_raw(fmt, ap); + Utils::gtp_printf_raw("\n\n"); - if (cfg_logfile_handle) { - std::lock_guard lock(IOmutex); - gtp_fprintf(cfg_logfile_handle, prefix, fmt, ap); - } } void Utils::gtp_printf(int id, const char *fmt, ...) { @@ -122,6 +114,17 @@ void Utils::gtp_printf(int id, const char *fmt, ...) { va_end(ap); } +void Utils::gtp_printf_raw(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + if (cfg_logfile_handle) { + std::lock_guard lock(IOmutex); + vfprintf(cfg_logfile_handle, fmt, ap); + } + va_end(ap); +} + void Utils::gtp_fail_printf(int id, const char *fmt, ...) { va_list ap; va_start(ap, fmt); diff --git a/src/Utils.h b/src/Utils.h index 33a10aef7..ae8c02704 100644 --- a/src/Utils.h +++ b/src/Utils.h @@ -32,6 +32,7 @@ extern Utils::ThreadPool thread_pool; namespace Utils { void myprintf(const char *fmt, ...); void gtp_printf(int id, const char *fmt, ...); + void gtp_printf_raw(const char *fmt, ...); void gtp_fail_printf(int id, const char *fmt, ...); void log_input(const std::string& input); bool input_pending(); From 6e847e134df63bc15fe1fa4c2c1989f37bf225a7 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Mon, 14 May 2018 13:08:04 +0200 Subject: [PATCH 03/18] Remove virtual loss from eval for live stats. For discussion see pull request #1412. --- src/UCTNode.cpp | 12 ++++++++++++ src/UCTNode.h | 1 + src/UCTSearch.cpp | 12 ++++++++---- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index f5649998c..1fffe16fa 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -204,6 +204,18 @@ int UCTNode::get_visits() const { return m_visits; } +// Return the true score, without taking into account virtual losses. +float UCTNode::get_pure_eval(int tomove) const { + auto visits = get_visits(); + assert(visits > 0); + auto blackeval = get_blackevals(); + auto score = static_cast(blackeval / double(visits)); + if (tomove == FastBoard::WHITE) { + score = 1.0f - score; + } + return score; +} + float UCTNode::get_eval(int tomove) const { // Due to the use of atomic updates and virtual losses, it is // possible for the visit count to change underneath us. Make sure diff --git a/src/UCTNode.h b/src/UCTNode.h index 9c88961e7..44a54a809 100644 --- a/src/UCTNode.h +++ b/src/UCTNode.h @@ -66,6 +66,7 @@ class UCTNode { float get_score() const; void set_score(float score); float get_eval(int tomove) const; + float get_pure_eval(int tomove) const; float get_net_eval(int tomove) const; void virtual_loss(void); void virtual_loss_undo(void); diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index b3f4eb0f3..cf6e9de90 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -228,7 +228,7 @@ void UCTSearch::dump_stats(FastState & state, UCTNode & parent) { myprintf("%4s -> %7d (V: %5.2f%%) (N: %5.2f%%) PV: %s\n", move.c_str(), node->get_visits(), - node->get_visits() ? node->get_eval(color)*100.0f : 0.0f, + node->get_visits() ? node->get_pure_eval(color)*100.0f : 0.0f, node->get_score() * 100.0f, pv.c_str()); } @@ -251,8 +251,12 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) { FastState tmpstate = state; tmpstate.play_move(node->get_move()); std::string pv = move + " " + get_pv(tmpstate, *node); - gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(), "move", move.c_str(), "visits", node->get_visits(), - "winrate", node->get_visits() ? (int)(node->get_eval(color)*10000) : 0, "pv", pv.c_str()); + auto move_eval = node->get_visits() ? + static_cast(node->get_pure_eval(color) * 10000) : 0; + gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(), + "move", move.c_str(), "visits", node->get_visits(), + "winrate", move_eval, + "pv", pv.c_str()); separator = " info"; } gtp_printf_raw("\n"); @@ -489,7 +493,7 @@ void UCTSearch::dump_analysis(int playouts) { int color = tempstate.board.get_to_move(); std::string pvstring = get_pv(tempstate, *m_root); - float winrate = 100.0f * m_root->get_eval(color); + float winrate = 100.0f * m_root->get_pure_eval(color); myprintf("Playouts: %d, Win: %5.2f%%, PV: %s\n", playouts, winrate, pvstring.c_str()); } From 9fd7542d7b918f432c9ab985433ea42565edf1f1 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Mon, 14 May 2018 13:14:43 +0200 Subject: [PATCH 04/18] Make analysis output use one move per line. More in line with UCI, cleaner, easier to parse, smaller code. --- src/UCTSearch.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index cf6e9de90..63dfe254b 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -242,7 +242,6 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) { const int color = state.get_to_move(); - std::string separator = "info"; for (const auto& node : parent.get_children()) { // Only send variations with visits if (!node->get_visits()) continue; @@ -253,14 +252,12 @@ void UCTSearch::output_analysis(FastState & state, UCTNode & parent) { std::string pv = move + " " + get_pv(tmpstate, *node); auto move_eval = node->get_visits() ? static_cast(node->get_pure_eval(color) * 10000) : 0; - gtp_printf_raw("%s %s %s %s %d %s %d %s %s", separator.c_str(), - "move", move.c_str(), "visits", node->get_visits(), + gtp_printf_raw("info %s %s %s %d %s %d %s %s\n", + "move", move.c_str(), + "visits", node->get_visits(), "winrate", move_eval, "pv", pv.c_str()); - separator = " info"; } - gtp_printf_raw("\n"); - } void tree_stats_helper(const UCTNode& node, size_t depth, From 1b64435d395ffccbe2032652a73b0da50b93eca1 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Mon, 14 May 2018 16:52:37 +0200 Subject: [PATCH 05/18] Remove versioned clang from Makefile. Don't hardcode the clang version in the Makefile. --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index b9b9cf4e5..f3890836d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -16,7 +16,7 @@ debug: clang: @echo "Detected OS: ${THE_OS}" - $(MAKE) CC=clang-5.0 CXX=clang++-5.0 \ + $(MAKE) CC=clang CXX=clang++ \ CXXFLAGS='$(CXXFLAGS) -Wall -Wextra -Wno-missing-braces -O3 -ffast-math -flto -march=native -std=c++14 -DNDEBUG' \ LDFLAGS='$(LDFLAGS) -flto -fuse-linker-plugin' \ leelaz From 62ddf58027da1b2952f0934eddd820a11d3879b8 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Mon, 14 May 2018 17:42:29 +0200 Subject: [PATCH 06/18] Fix varargs usage. Regression from #1388. Fixes issue #1424. --- src/Utils.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Utils.cpp b/src/Utils.cpp index d0bebb1f1..735d4af50 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -94,17 +94,23 @@ void Utils::myprintf(const char *fmt, ...) { } } +static void gtp_fprintf(FILE* file, const std::string& prefix, + const char *fmt, va_list ap) { + fprintf(file, "%s ", prefix.c_str()); + vfprintf(file, fmt, ap); + fprintf(file, "\n\n"); +} + static void gtp_base_printf(int id, std::string prefix, const char *fmt, va_list ap) { if (id != -1) { prefix += std::to_string(id); } - prefix += " "; - - Utils::gtp_printf_raw(prefix.c_str()); - Utils::gtp_printf_raw(fmt, ap); - Utils::gtp_printf_raw("\n\n"); - + gtp_fprintf(stdout, prefix, fmt, ap); + if (cfg_logfile_handle) { + std::lock_guard lock(IOmutex); + gtp_fprintf(cfg_logfile_handle, prefix, fmt, ap); + } } void Utils::gtp_printf(int id, const char *fmt, ...) { @@ -118,11 +124,14 @@ void Utils::gtp_printf_raw(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stdout, fmt, ap); + va_end(ap); + if (cfg_logfile_handle) { std::lock_guard lock(IOmutex); + va_start(ap, fmt); vfprintf(cfg_logfile_handle, fmt, ap); + va_end(ap); } - va_end(ap); } void Utils::gtp_fail_printf(int id, const char *fmt, ...) { From 4d3f6444cffb8354501f2806e98caf430cd73f9f Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Wed, 16 May 2018 03:59:19 -0400 Subject: [PATCH 07/18] make c_puct proportional to winrate --- src/UCTNode.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index 1fffe16fa..ff5d14fe8 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -269,11 +269,13 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { // Lower the expected eval for moves that are likely not the best. // Do not do this if we have introduced noise at this node exactly // to explore more. + + auto pure_eval = get_pure_eval(color); if (!is_root || !cfg_noise) { - fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy); + fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5; } // Estimated eval for unknown nodes = original parent NN eval - reduction - auto fpu_eval = get_net_eval(color) - fpu_reduction; + auto fpu_eval = pure_eval - fpu_reduction; auto best = static_cast(nullptr); auto best_value = std::numeric_limits::lowest(); @@ -289,7 +291,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { } auto psa = child.get_score(); auto denom = 1.0 + child.get_visits(); - auto puct = cfg_puct * psa * (numerator / denom); + auto puct = cfg_puct * psa * (numerator / denom) * pure_eval / 0.5; auto value = winrate + puct; assert(value > std::numeric_limits::lowest()); From c165c0514a934a1a8eb1a6384b1914dc664fa085 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Wed, 16 May 2018 22:35:14 -0400 Subject: [PATCH 08/18] revise comment about fpu --- src/UCTNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index ff5d14fe8..6d35b95f6 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -274,7 +274,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { if (!is_root || !cfg_noise) { fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5; } - // Estimated eval for unknown nodes = original parent NN eval - reduction + // Estimated eval for unknown nodes = current parent winrate - reduction auto fpu_eval = pure_eval - fpu_reduction; auto best = static_cast(nullptr); From a63ee5fe68d7a2cf109ff97b7610deb233db04a1 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Wed, 23 May 2018 16:43:05 -0400 Subject: [PATCH 09/18] fix division by zero with multi threads --- src/UCTNode.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index 6d35b95f6..e50dacdcd 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -75,7 +75,6 @@ bool UCTNode::create_children(std::atomic& nodecount, } // We'll be the one queueing this node for expansion, stop others m_is_expanding = true; - lock.unlock(); const auto raw_netlist = Network::get_scored_moves( &state, Network::Ensemble::RANDOM_SYMMETRY); @@ -87,6 +86,8 @@ bool UCTNode::create_children(std::atomic& nodecount, if (state.board.white_to_move()) { m_net_eval = 1.0f - m_net_eval; } + update(m_net_eval); + lock.unlock(); eval = m_net_eval; std::vector nodelist; From ae4d04d69211f78b88afee377a8fd2a50ff42581 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Wed, 23 May 2018 16:43:59 -0400 Subject: [PATCH 10/18] fix division by zero with multi threads --- src/UCTSearch.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index 63dfe254b..9ccb9639c 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -168,6 +168,7 @@ SearchResult UCTSearch::play_simulation(GameState & currstate, if (currstate.get_passes() >= 2) { auto score = currstate.final_score(); result = SearchResult::from_score(score); + node->update(result.eval()); } else if (m_nodes < MAX_TREE_SIZE) { float eval; const auto had_children = node->has_children(); @@ -189,12 +190,13 @@ SearchResult UCTSearch::play_simulation(GameState & currstate, next->invalidate(); } else { result = play_simulation(currstate, next); + if (result.valid()) { + node->update(result.eval()); + } } } - if (result.valid()) { - node->update(result.eval()); - } + node->virtual_loss_undo(); return result; From bbfb44597a13d1ca3a514950d50380ca47662f6c Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Wed, 23 May 2018 20:01:41 -0400 Subject: [PATCH 11/18] revert unnecessarily extending lock --- src/UCTNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index e50dacdcd..961dfa22f 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -75,6 +75,7 @@ bool UCTNode::create_children(std::atomic& nodecount, } // We'll be the one queueing this node for expansion, stop others m_is_expanding = true; + lock.unlock(); const auto raw_netlist = Network::get_scored_moves( &state, Network::Ensemble::RANDOM_SYMMETRY); @@ -87,7 +88,6 @@ bool UCTNode::create_children(std::atomic& nodecount, m_net_eval = 1.0f - m_net_eval; } update(m_net_eval); - lock.unlock(); eval = m_net_eval; std::vector nodelist; From 05380eb85f61caa263deedd7116fb1273bdaeaac Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Tue, 12 Jun 2018 00:49:43 -0400 Subject: [PATCH 12/18] Update UCTNode.cpp --- src/UCTNode.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index 961dfa22f..d9e172371 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -271,12 +271,11 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { // Do not do this if we have introduced noise at this node exactly // to explore more. - auto pure_eval = get_pure_eval(color); if (!is_root || !cfg_noise) { - fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy) * pure_eval / 0.5; + fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy); } // Estimated eval for unknown nodes = current parent winrate - reduction - auto fpu_eval = pure_eval - fpu_reduction; + auto fpu_eval = get_pure_eval(color) - fpu_reduction; auto best = static_cast(nullptr); auto best_value = std::numeric_limits::lowest(); @@ -292,7 +291,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { } auto psa = child.get_score(); auto denom = 1.0 + child.get_visits(); - auto puct = cfg_puct * psa * (numerator / denom) * pure_eval / 0.5; + auto puct = cfg_puct * psa * (numerator / denom); auto value = winrate + puct; assert(value > std::numeric_limits::lowest()); From 2c3e50fc316d5196486b91779beec08788530de8 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Tue, 12 Jun 2018 00:50:32 -0400 Subject: [PATCH 13/18] Update config.h --- src/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.h b/src/config.h index 100e3e998..1e8d3a4d2 100644 --- a/src/config.h +++ b/src/config.h @@ -78,7 +78,7 @@ * USE_TUNER: Expose some extra command line parameters that allow tuning the * search algorithm. */ -//#define USE_TUNER +#define USE_TUNER #define PROGRAM_NAME "Leela Zero" #define PROGRAM_VERSION "0.15" From 62533d2009c16323e4b72948c59679be92bc6b70 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Mon, 18 Jun 2018 00:08:38 -0400 Subject: [PATCH 14/18] Update config.h --- src/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.h b/src/config.h index 1e8d3a4d2..100e3e998 100644 --- a/src/config.h +++ b/src/config.h @@ -78,7 +78,7 @@ * USE_TUNER: Expose some extra command line parameters that allow tuning the * search algorithm. */ -#define USE_TUNER +//#define USE_TUNER #define PROGRAM_NAME "Leela Zero" #define PROGRAM_VERSION "0.15" From fe75facff3382d9e71df85590b47162b5da46729 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Mon, 18 Jun 2018 00:11:14 -0400 Subject: [PATCH 15/18] Update UCTNode.cpp --- src/UCTNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index d9e172371..aaa8a6b5d 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -275,7 +275,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy); } // Estimated eval for unknown nodes = current parent winrate - reduction - auto fpu_eval = get_pure_eval(color) - fpu_reduction; + auto fpu_eval = get_net_eval(color) - fpu_reduction; auto best = static_cast(nullptr); auto best_value = std::numeric_limits::lowest(); From 4758de09209ebf899e2b1b7f2590f45dc806a2e5 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Mon, 18 Jun 2018 00:13:14 -0400 Subject: [PATCH 16/18] Update UCTNode.cpp --- src/UCTNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index aaa8a6b5d..c4f5b33dd 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -274,7 +274,7 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { if (!is_root || !cfg_noise) { fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy); } - // Estimated eval for unknown nodes = current parent winrate - reduction + // Estimated eval for unknown nodes = original parent NN eval - reduction auto fpu_eval = get_net_eval(color) - fpu_reduction; auto best = static_cast(nullptr); From c5986b063ec7b11f193361c21f1bb2300f9dde3c Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Mon, 18 Jun 2018 00:16:44 -0400 Subject: [PATCH 17/18] Update UCTNode.cpp --- src/UCTNode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/UCTNode.cpp b/src/UCTNode.cpp index c4f5b33dd..3f008e4f1 100644 --- a/src/UCTNode.cpp +++ b/src/UCTNode.cpp @@ -270,7 +270,6 @@ UCTNode* UCTNode::uct_select_child(int color, bool is_root) { // Lower the expected eval for moves that are likely not the best. // Do not do this if we have introduced noise at this node exactly // to explore more. - if (!is_root || !cfg_noise) { fpu_reduction = cfg_fpu_reduction * std::sqrt(total_visited_policy); } From d25f752a3315aa99ce4e44c10220c0f01e4c41c7 Mon Sep 17 00:00:00 2001 From: Junyan Xu Date: Mon, 18 Jun 2018 00:18:13 -0400 Subject: [PATCH 18/18] Update UCTSearch.cpp --- src/UCTSearch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/UCTSearch.cpp b/src/UCTSearch.cpp index 9ccb9639c..ae67077ff 100644 --- a/src/UCTSearch.cpp +++ b/src/UCTSearch.cpp @@ -196,7 +196,6 @@ SearchResult UCTSearch::play_simulation(GameState & currstate, } } - node->virtual_loss_undo(); return result;